# !diagnostics off

# Set the directory to the location of the unzipped files
t = try(setwd("E:/Dropbox/Research/pill/PillUptakeReplication"), silent=T)
if (inherits(t, "try-error")) setwd("C:/Dropbox/Research/pill/PillUptakeReplication")

# Month assumed for starting and ending contraceptive use when month not remembered
AssumedStartMonth = 7
AssumedFinishMonth = 6

library(tidyverse, quietly = T)
theme_set(theme_minimal())
library(lubridate)
library(stringr)

# Mode function that works with strings
Mode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

load("ADA_00497_AFP_F_STATA.RData")
# One of the variables was misnamed (it follows a different pattern from others)
x = as_tibble(x) %>% rename(q26ay1st=q26ayist)

aom = read_csv("Australia_Laws_WithOverseas.csv")


x$SurveyMonth = ifelse(x$monint<=12, x$monint, x$monint-12)
x$SurveyYear = ifelse(x$monint<=12, 1986, 1987)
x$SurveyMonthString = month.abb[x$SurveyMonth]
x$SurveyDate = ymd(paste(x$SurveyYear, x$SurveyMonth, "01", sep="-"))

# Basic demographics

# Change birth months and years to numeric
x$q1b1 = as.numeric(x$q1b1)
x$q1b2 = as.numeric(x$q1b2)

x$Age = as.numeric(x$q1a)
x$BornYear = x$q1b2+1900
x$BornMonth = x$q1b1
x$BornDate = ymd(paste(x$BornYear,x$BornMonth,"01",sep="-"))
# x$BornMonth[x$q1b1==99] = NA

# Remove the two people who did not remember their birth month
# They were born in 1932 and 1933 and so are not useful for the paper
x = filter(x, q1b1<99)

# Catholic upbrining
x$Catholic = ifelse(x$q21a==1, 1, 0)

# Born in Australia
x$BornInA = ifelse(x$q1c==1, 1, 0)


MonthNumString = c("01","02","03","04","05","06","07","08","09","10","11","12")
x$BornMonthString = MonthNumString[x$BornMonth]

# DaysInMonthNum = c(31,28,31,30,31,30,31,31,30,31,30,31)
# DaysInMonthStr = as.character(DaysInMonthNum)
# DayString = c("01","02","03","04","05","06","07","08","09","10", 
#               "11","12","13","14","15","16","17","18","19","20", 
#               "21","22","23","24","25","26","27","28","29","30", "31")

places = tibble(
  StateCodeMerge = c(rep("NSW",5), rep("Vic",5), rep("Qld",5), rep("SA",4), rep("WA",4), 
                     rep("Tas",4), rep("NT",4), rep("ACT",2), NA, "Overseas",NA),
  placeCode = c(11, 12, 13, 14, 18, 21, 22, 23, 24, 28, 31, 32, 33, 34, 38, 
                41, 43, 44, 48, 51, 53, 54, 58, 61, 63, 64, 68, 71, 73, 74, 78, 
                81, 84, 88, 98, 99)
)

states = tibble(
  state = c(1:6,8, NA),
  StateCodeMerge = c("NSW", "Vic", "Qld", "SA", "WA", "Tas", "ACT", NA)
)


# # No one was interviewed on February 29.
# # 1986 was not a leap year
# # Assign the survey day as the minimum birth day 
# # for people who report not yet having their birthday that month.
# # Use the survey day instead of the survey day + 1 because
# # Some people born in May and surveyed on 31 May 1986 report that 
# # their age is less than the survey year minus their birth year. See:
# # x[x$BornDayMin==32, c("Age","BornYear","BornMonth","Month","Year","dayint")]
# # x$Age<x$SurveyYear-x$BornYear means that she has not had her birthday yet this year
# x$BornDayMin = ifelse(x$SurveyMonth==x$BornMonth, ifelse(x$Age<x$SurveyYear-x$BornYear, x$dayint, 1), 1)
# x$BornDayMax = ifelse(x$SurveyMonth==x$BornMonth, 
#                       ifelse(x$Age<x$SurveyYear-x$BornYear, DaysInMonthNum[x$BornMonth], x$dayint), 
#                       DaysInMonthNum[x$BornMonth])
# 
# x$BornStringMin = ymd(paste(x$BornYear,x$BornMonthString,DayString[x$BornDayMin],sep="-"))
# x$BornStringMax = ymd(paste(x$BornYear,x$BornMonthString,DayString[x$BornDayMax],sep="-"))


# # Some people do not know when they moved (code 99)
# data.frame(
#   Home = 0:13,
#   varnameTime = c("q1d2","q97ay","q106b1st","q106b2nd","q106b3rd", "q106b4th",
#                   "q106b5th", "q106b6th", "q106b7th", "q106b8th", "q106b9th",
#                   "q106b10h", "q106b11h", "q106b12h"),
#   Missing = c(sum(x$q1d2==99 & !is.na(x$q1d2)),
#               sum(x$q97ay==99 & !is.na(x$q97ay)),
#               sum(x$q106b1st==99 & !is.na(x$q106b1st)),
#               sum(x$q106b2nd==99 & !is.na(x$q106b2nd)),
#               sum(x$q106b3rd==99 & !is.na(x$q106b3rd)),
#               sum(x$q106b4th==99 & !is.na(x$q106b4th)),
#               sum(x$q106b5th==99 & !is.na(x$q106b5th)),
#               sum(x$q106b6th==99 & !is.na(x$q106b6th)),
#               sum(x$q106b7th==99 & !is.na(x$q106b7th)),
#               sum(x$q106b8th==99 & !is.na(x$q106b8th)),
#               sum(x$q106b9th==99 & !is.na(x$q106b9th)),
#               sum(x$q106b10h==99 & !is.na(x$q106b10h)),
#               sum(x$q106b11h==99 & !is.na(x$q106b11h)),
#               sum(x$q106b12h==99 & !is.na(x$q106b12h)))
# )
# 
# # Look at the people who do not know when they moved
x %>%
  filter(q106b1st==99 | q106b2nd==99 | q106b3rd==99 | q106b4th==99 | q106b4th==99 | 
           q106b5th==99 | q106b6th==99 | q106b7th==99 | q106b8th==99 | q106b9th==99 | 
           q106b10h==99 | q106b11h==99 | q106b12h==99) %>%
  select("id","BornYear", BornMonth,"q97ay","q106b1st","q106b2nd","q106b3rd", "q106b4th", "q106b5th", "q106b6th")
# Only need to look at first 4 moves
x %>%
  filter(q106b1st==99 | q106b2nd==99 | q106b3rd==99 | q106b4th==99) %>%
  select("id","BornYear","q97ay","q106b1st","q106b2nd","q106b3rd", "q106b4th", "q106b5th", "q106b6th")
# All of them were born in Australia, so moving out of family home is first reported move (possibly not first move)
# q101a: was the respondent born in Australia
x %>%
  filter(q106b1st==99 | q106b2nd==99 | q106b3rd==99 | q106b4th==99) %>%
  select(q101a)
# Two of them moved since then
# q104b: Is this the town or rural area where she is living now
# q104c: Since that time, have you always lived in [town/rural area]
# These questions were unclear to respondents
x %>%
  filter(q106b1st==99 | q106b2nd==99 | q106b3rd==99 | q106b4th==99) %>%
  select("id","BornYear","q97ay","q106b1st","q106b2nd","q106b3rd", "q106b4th", "q106b5th", "q106b6th", 
         q98a, q104a1, q104b, q104c, q106a1st)
# id 785 moved out of family home to same state and then claims to have left that placeand moved back but did not.
# id 694 moved out of family home to same state and then claims to have left that place but did not.
# id 54 moved out of family home in NSW at age 21 to Qld, 
#  moved back to NSW in an unknown year, and moved again to Qld in an unknown year.
#  She was 21 at the first move, so her later data are less important.
x %>%
  filter(q106b1st==99 | q106b2nd==99 | q106b3rd==99 | q106b4th==99) %>%
  select("id","BornYear","q97ay","q106b1st","q106b2nd","q106b3rd", "q106b4th", "q106b5th", "q106b6th", 
         q98a, q104a1, q106a1st, q106a2nd, q106a3rd, q106a4th, q106a5th, state)

# # The following line of code is for testing and relies on variables created later
# x[row.names(x)==326, 
#   c("BornYear","q97ay","q106b1st","q106b2nd","q106b3rd", "q106b4th", "q106b5th", "q106b6th",
#     "HomeFam","Home1","Home2","Home3","Home4","Home5","Home6")]

# If moved to same state in unknown year, replace the 99s with NA (date and location)
# These moves were to new states
x$q106b1st[x$id==54] = NA
x$q106b2nd[x$id==54] = NA
x$q106a1st[x$id==54] = NA
x$q106a2nd[x$id==54] = NA
# These moves were within the same state
x$q106b3rd[x$id==54] = NA
x$q106b4th[x$id==54] = NA
x$q106a3rd[x$id==54] = NA
x$q106a4th[x$id==54] = NA

# id 694 stayed in WA all her life (at least after leaving family home)
x$q106b3rd[x$id==694] = NA
x$q106b4th[x$id==694] = NA
x$q106b5th[x$id==694] = NA
x$q106a3rd[x$id==694] = NA
x$q106a4th[x$id==694] = NA
x$q106a5th[x$id==694] = NA

# id 785 stayed in Qld all her life (at least after leaving family home)
x$q106b2nd[x$id==785] = NA
x$q106a2nd[x$id==785] = NA

# One person (id 240) who was born in Australia
# reported moving within WA after leaving home
# but gave the date as five years before leaving home.
# Because the move is in-state, it does not matter for our purposes,
# so I just made the date of the move 1983 instead of 1973.
# This has no effect on analysis.
x %>% filter(q97ay > q106b1st) %>%
  select("id", "q97ay", "q106b1st", "q106b2nd", q98a, "q106a1st", "q106a2nd", "q101a")
x$q106b1st[x$id==240] = 83
# Another person (id 460) reported making their 3rd move in 1966 and their 4th in 1965.
# The 3rd move is to ACT, and the 4th is to NSW.
# I would assume that the moves are in the right order because
# every subsequent move (2 more) is within NSW.
# It does not matter much because the person was born in 1941 and would be at least 23
# by the time of the 3rd move, 
# so I just switched the dates.
x[x$q106b2nd>x$q106b3rd & !is.na(x$q106b2nd) & !is.na(x$q106b3rd), 
  c("id", "q97ay", "q106b1st", "q106b2nd", "q106b3rd", "q106b4th", "q106b5th", "q106b6th", 
    "q106a1st", "q106a2nd", "q106a3rd", "q106a4th", "q106a5th", "q106a6th")]
x$q106b2nd[x$id==460] = 65
x$q106b3rd[x$id==460] = 66


# # Check if dates are in order
# sum(x$q97ay > x$q106b1st, na.rm=T)
# sum(x$q106b1st > x$q106b2nd, na.rm=T)
# sum(x$q106b2nd > x$q106b3rd, na.rm=T)
# sum(x$q106b3rd > x$q106b4th, na.rm=T)
# sum(x$q106b4th > x$q106b5th, na.rm=T)
# sum(x$q106b5th > x$q106b6th, na.rm=T)
# sum(x$q106b6th > x$q106b7th, na.rm=T)
# sum(x$q106b7th > x$q106b8th, na.rm=T)
# sum(x$q106b8th > x$q106b9th, na.rm=T)
# sum(x$q106b9th > x$q106b10h, na.rm=T)
# sum(x$q106b10h > x$q106b11h, na.rm=T)
# sum(x$q106b11h > x$q106b12h, na.rm=T)
# sum(x$q106b12h > x$q106b13h, na.rm=T)



####################
# Create variables with consistent names to hold move dates

# Home0 is overseas or family home.
# Home.5 is for people born overseas and is the earlier of arriving in A or leaving home.
# Home1 is leaving home for people born in A and 
## the later of arriving in A or leaving home for people born overseas.

# If arrived at same time as left family home, drop one.
# idmvhm = (x %>% filter(q1d2==q97ay &q1d1==q97am))$id
# x = x %>% mutate(MoveToAWhenLeftFam = ifelse(q1d2==q97ay & q1d1==q97am, 1, 0))
x = x %>% mutate(MoveToAWhenLeftFam = ifelse(q1d2!=q97ay | q1d1!=q97am | is.na(q1d2), 0, 1))
# table(x$HomeFam[x$MoveToAWhenLeftFam==1])

# Assign Years of moves
# Move 0 is birth
x$Move0Year = x$BornYear
# Move .5 is coming to Australia if before leaving family home
# If moved to Australia after leaving family home, 
# that will show up as one of the other moves.
x$Move.5Year = ifelse((x$q1c==1 | x$MoveToAWhenLeftFam==1) | !(x$q97ay==98 | x$q101b==1), NA, x$q1d2) + 1900
# Move 1 is leaving family home
x$Move1Year = ifelse(x$q97ay!=98, x$q97ay, NA) + 1900
x$Move2Year = x$q106b1st + 1900
x$Move3Year = x$q106b2nd + 1900
x$Move4Year = x$q106b3rd + 1900
x$Move5Year = x$q106b4th + 1900
x$Move6Year = x$q106b5th + 1900
x$Move7Year = x$q106b6th + 1900
x$Move8Year = x$q106b7th + 1900
x$Move9Year = x$q106b8th + 1900
x$Move10Year = x$q106b9th + 1900
x$Move11Year = x$q106b10h + 1900
x$Move12Year = x$q106b11h + 1900
x$Move13Year = x$q106b12h + 1900

# Assign Months of moves without imputing unknown months
# Move 0 is birth
x$Move0Month = x$BornMonth
# Move .5 is coming to Australia if before leaving family home
# If moved to Australia after leaving family home, 
# that will show up as one of the other moves.
x$Move.5Month = ifelse((x$q1c==1 | x$MoveToAWhenLeftFam==1) | !(x$q97am==98 | x$q101b==1), NA, x$q1d1)
# Move 1 is leaving family home
x$Move1Month = ifelse(x$q97am!=98, x$q97am, NA)
# Home1 is leaving home for people born in A and 
## the later of arriving in A or leaving home for people born overseas.
x$Move1Month = 
  with(x, 
       ifelse(q97am==98, NA, # Never left home (no NAs)
              ifelse(q1c==1 | MoveToAWhenLeftFam==1, q97am, # Born in Australia (no NAs)
                     ifelse(q101b==1, # Left after coming to Australia (NA if born in A or never left home)
                            q97am,  
                            q1d1))))



####################
# Destinations of moves

# Create state of residence right before first left family home
# x = merge(x, places, by.x="q98a", by.y="placeCode", all.x=T)
x = left_join(x, places, by=c("q98a"="placeCode"))%>%
  rename(HomeFam = StateCodeMerge)
# For those currently in family home, use current state
# x = merge(x, states, by="state", all.x=T)
x = left_join(x, states, by="state")
x$HomeFam[x$q97am==98] = x$StateCodeMerge[x$q97am==98]
x$StateCodeMerge = NULL

# id 22 and 2544 moved out of family home in same month as arriving in Australia,
# so set their family home as overseas
x$HomeFam[x$id==22] = "Overseas"
x$HomeFam[x$id==2544] = "Overseas"

# Problem: was there anyone born in Australia who moved out and then back with family?
# Yes. 6 people.
table(x$HomeFam[x$q1c==1])
# I have little information on when these people left Australia
# and treat them like people born overseas who move to A with family

# If born overseas, that is Home0
# If born in Australia, Home0 is family home (may not be birthplace)
# These two are equivalent
x$Home0 = ifelse(x$BornInA==0, "Overseas", x$HomeFam)
# x$Home0 = ifelse(!is.na(x$Move.5Year), "Overseas", x$HomeFam)

# Homes .5 is for people born overseas and is the earlier of arriving in A or leaving home.
x$Home.5 = 
  with(x, 
       ifelse(q1c==1, NA, # born in Austalia
              ifelse(q97am==98 | q101b==1, #Never left home or left after coming to Australia
                     HomeFam, # If left after coming to A, then when came to A, came to family home
                     "Overseas"))) #Left before coming to Australia
# Home 1 can be many things, but the location is always given by q104a1 if there is a location
# Similarly, Home 2 location is always given by q106a1st, etc.
# Create state of residence after left family home and arrived in Australia
# (or after arrival in Australia)
x = left_join(x, places, by=c("q104a1"="placeCode")) %>%
  rename(Home1 = StateCodeMerge)
x = left_join(x, places, by=c("q106a1st"="placeCode")) %>%
  rename(Home2 = StateCodeMerge)
x = left_join(x, places, by=c("q106a2nd"="placeCode")) %>%
  rename(Home3 = StateCodeMerge)
x = left_join(x, places, by=c("q106a3rd"="placeCode")) %>%
  rename(Home4 = StateCodeMerge)
x = left_join(x, places, by=c("q106a4th"="placeCode")) %>%
  rename(Home5 = StateCodeMerge)
x = left_join(x, places, by=c("q106a5th"="placeCode")) %>%
  rename(Home6 = StateCodeMerge)
x = left_join(x, places, by=c("q106a6th"="placeCode")) %>%
  rename(Home7 = StateCodeMerge)
x = left_join(x, places, by=c("q106a7th"="placeCode")) %>%
  rename(Home8 = StateCodeMerge)
x = left_join(x, places, by=c("q106a8th"="placeCode")) %>%
  rename(Home9 = StateCodeMerge)
x = left_join(x, places, by=c("q106a9th"="placeCode")) %>%
  rename(Home10 = StateCodeMerge)
x = left_join(x, places, by=c("q106a10h"="placeCode")) %>%
  rename(Home11 = StateCodeMerge)
x = left_join(x, places, by=c("q106a11h"="placeCode")) %>%
  rename(Home12 = StateCodeMerge)
x = left_join(x, places, by=c("q106a12h"="placeCode")) %>%
  rename(Home13 = StateCodeMerge)




####################
# Reshape the move data to long format

fMoveYear = 
  x %>%
  select(id,Move0Year:Move13Year) %>%
  gather(HomeNum, HomeStartYear, Move0Year:Move13Year, na.rm=T) %>%
  mutate(HomeNum = as.numeric(gsub("Year","", gsub("Move","", HomeNum))))
fMoveMonth = 
  x %>% 
  select(id,Move0Month:Move1Month) %>%
  gather(HomeNum, HomeStartMonth, Move0Month:Move1Month, na.rm=T) %>%
  mutate(HomeNum = as.numeric(gsub("Month","", gsub("Move","", HomeNum))))
fHomes = 
  x %>% 
  select(id,Home0:Home13) %>%
  gather(HomeNum, StateCode, Home0:Home13, na.rm=T) %>%
  mutate(HomeNum = as.numeric(gsub("Home", "", HomeNum)))
fMoves = 
  full_join(fMoveYear, fMoveMonth) %>% 
  left_join(fHomes) %>% 
  arrange(id,HomeNum) %>%
  left_join(x %>% select(id, q1d1, q1d2, MoveToAWhenLeftFam)) 
fMoves = 
  fMoves %>%
  group_by(id) %>%
  mutate(Match = ifelse(!(HomeNum>1 & 1900+q1d2==HomeStartYear & MoveToAWhenLeftFam==0) | is.na(q1d2), 0, 1),
         MinMatch = ifelse(length(HomeNum[Match==1])>0, min(HomeNum[Match==1]), 0))
fMoves$HomeStartMonth[fMoves$HomeNum==fMoves$MinMatch & fMoves$Match==1] = 
  fMoves$q1d1[fMoves$HomeNum==fMoves$MinMatch & fMoves$Match==1] 
# There are repeated idXyearXmonth moves


fMoves$HomeStartMonth[is.na(fMoves$HomeStartMonth)] = 99
fMoves$ImputedMonth = ifelse(fMoves$HomeStartMonth==99, 1, 0)



# Look at moves into A after move out of fam home
# PRoblem cases
Probs = fMoves %>%
  filter(HomeNum>1 & 1900+q1d2==HomeStartYear) %>% arrange(id, HomeNum)
# Usually only one case where move into A matches year
nrow(Probs)
length(unique(Probs$id))
# Luckily, the one person with a missing month has only one match


# Get data on number of moves in each year for each id
# and number with unknown month of moves
fMoves = 
  fMoves %>%
  group_by(id, HomeStartYear) %>%
  mutate(nYearStates = length(unique(StateCode)),
         nYearMoves = length(id),
         nYearUnknownMonths = sum(HomeStartMonth==99),
         nYearKnownMonths = sum(HomeStartMonth!=99))%>%
  arrange(id,HomeNum) %>%
  group_by(id) %>%
  mutate(RepeatYear=ifelse(HomeStartYear!=lag(HomeStartYear) | is.na(lag(HomeStartYear)),0,1),
         PreviousState = lag(StateCode))


####################
# Indicate moves that are to the same states as all other moves that year 
# and state of residence before that year (MoveUnimportant)
# Leave them in until later in case they are needed for matching to relationships
fMovesFirstTimeThisYear = 
  fMoves %>% 
  group_by(id, HomeStartYear) %>%
  mutate(FirstStateThisYear = StateCode[1],
            LastStateLastYear = PreviousState[1])
fMoves = 
  fMoves %>%
  left_join(fMovesFirstTimeThisYear)
fMoves = 
  fMoves %>%
  mutate(MoveUnimportant = 
           ifelse(nYearStates==1 & HomeNum!=0 & 
                  (FirstStateThisYear==LastStateLastYear | is.na(LastStateLastYear)), 
                  1, 0))

# # Repeated months
# fMoves %>%
#   group_by(id, HomeStartYear, HomeStartMonth) %>%
#   filter(length(id)>1)


####################
# Collect dates moved in with romantic partner in long format
# Will be used to correct move dates in next section

# table(x$q29bm1st, x$q29a1st)
# # 12 women do not remember date of marriage for first relationship.
# # All 12 report starting living with partner immediately after marriage.
# with(x %>% filter(q29a1st%in%c(1,2)), 
#      table(q29bm1st, q29cm1st))
# with(x %>% filter(q29a1st%in%c(1,2)), 
#      table(q29by1st, q29cy1st))

fMovedInYear = 
  x %>%
  select(id, q29by1st, q29by2nd, q29by3rd, q29by4th, q29by5th, q29by6th) %>%
  gather(RelationshipNum, MovedInYear, q29by1st:q29by6th, na.rm=T) %>%
  mutate(RelationshipNum = as.integer(substr(RelationshipNum, 6,6)))
fMovedInMonth = 
  x %>%
  select(id, q29bm1st, q29bm2nd, q29bm3rd, q29bm4th, q29bm5th, q29bm6th) %>%
  gather(RelationshipNum, MovedInMonth, q29bm1st:q29bm6th, na.rm=T) %>%
  mutate(RelationshipNum = as.integer(substr(RelationshipNum, 6,6)))
fMarriageYear = 
  x %>%
  select(id, q29cy1st, q29cy2nd, q29cy3rd, q29cy4th, q29cy5th, q29cy6th) %>%
  gather(RelationshipNum, MarriageYear, q29cy1st:q29cy6th, na.rm=T) %>%
  mutate(RelationshipNum = as.integer(substr(RelationshipNum, 6,6)))
fMarriageMonth = 
  x %>%
  select(id, q29cm1st, q29cm2nd, q29cm3rd, q29cm4th, q29cm5th, q29cm6th) %>%
  gather(RelationshipNum, MarriageMonth, q29cm1st:q29cm6th, na.rm=T) %>%
  mutate(RelationshipNum = as.integer(substr(RelationshipNum, 6,6)))
fRelationshipType = 
  x %>%
  select(id, q29a1st, q29a2nd, q29a3rd, q29a4th, q29a5th, q29a6th) %>%
  gather(RelationshipNum, RelationshipType, q29a1st:q29a6th, na.rm=T) %>%
  mutate(RelationshipNum = as.integer(substr(RelationshipNum, 5,5)))
fRelationshipOver =
  x %>%
  select(id, q40a1st, q40a2nd, q40a3rd, q40a4th, q40a5th, q40a6th) %>%
  gather(RelationshipNum, NotOver, q40a1st:q40a6th, na.rm=T) %>%
  mutate(RelationshipNum = as.integer(substr(RelationshipNum, 5,5)),
         NotOver = 2-NotOver)

fMovedIn = 
  fRelationshipType %>%
  left_join(fMovedInYear, by=c("id", "RelationshipNum")) %>%
  left_join(fMovedInMonth, by=c("id", "RelationshipNum")) %>%
  left_join(fMarriageYear, by=c("id", "RelationshipNum")) %>%
  left_join(fMarriageMonth, by=c("id", "RelationshipNum")) %>%
  left_join(fRelationshipOver, by=c("id", "RelationshipNum"))
# One woman who did not remember her marriage date has a spouse who did 
# (See the MaleFemaleCombine file)
fMovedIn$MarriageMonth[fMovedIn$id==1995] = 12
fMovedInCorrected = fMovedIn %>%
  mutate(MovedInYear = ifelse(MovedInYear==98, MarriageYear, MovedInYear),
         MovedInMonth = ifelse(MovedInMonth==98, MarriageMonth, MovedInMonth),
         MovedInMonth = ifelse(MovedInMonth==99 & MovedInYear==MarriageYear, MarriageMonth, MovedInMonth), # No effect
         MovedInYear = MovedInYear+1900,
         MarriageYear = MarriageYear+1900,
         RelationshipType2 = ifelse(RelationshipType %in% c(1,2), "Married Immediately", 
                                    ifelse(RelationshipType==3, "Married Later", "Not married")))
# # Why that line has no effect:
# ggplot(fMovedIn %>% filter(MovedInMonth==99 & MarriageMonth!=99),
#        aes(MovedInYear, MarriageYear))+
#   geom_abline(intercept=0, slope=1, color="grey50") +
#   geom_point()



################
# Match moves to relationship move-ins from the same year

CombinedMoves = 
  as.tibble(expand.grid(id=x$id, RelationshipNum=unique(fMovedIn$RelationshipNum), HomeNum=unique(fMoves$HomeNum)))
CombinedMoves = 
  CombinedMoves %>%
  left_join(fMoves) %>%
  left_join(fMovedInCorrected) %>%
  arrange(id, HomeNum, RelationshipNum)
# sum(CombinedMoves$HomeStartYear==CombinedMoves$MovedInYear, na.rm=T)
CombinedMoves =
  CombinedMoves %>% 
  filter(HomeStartYear==MovedInYear)
# filter(HomeStartYear==MovedInYear | HomeStartYear==MarriageYear)
# Problem: MovedInYear can equal MovedInYear from different relationship
RepeatedHomes = 
  CombinedMoves %>%
  group_by(id) %>%
  summarise(MultiMatch = length(unique(HomeNum))<length(HomeNum)) %>%
  filter(MultiMatch)
CombinedMoves %>% filter(id==1145)
# select(-HomeNumThisYear, -Quarter, -AnyMoveMonthKnownThisYr)
# id 1145 had two live-in relationships in 1985 and moved 4 times that year.
# all moves were to Qld
# Remembers moving in for first 1985 relationship in January
# Can drop the other three moves
# CombinedMoves$HomeStartMonth[CombinedMoves$id==1145 & CombinedMoves$RelationshipNum==2 & CombinedMoves$HomeNum==4] = 1
# CombinedMoves = CombinedMoves %>% filter(!(id==1145 & is.na(HomeStartMonth)))
# Replace the values for this person in both data sets
fMoves$HomeStartMonth[fMoves$id==1145 & fMoves$HomeNum==4] = 1
fMoves = fMoves %>% filter(!(id==1145 & HomeNum %in% 5:7))
CombinedMoves = CombinedMoves %>% filter(!(id==1145 & (HomeNum %in% 5:7 | RelationshipNum==2)))



# # Problem: MovedInYear and MarriageYear could both match HomeStartYear but with different months
# # This is not really a problem. Just use MovedInYear.
# sum(CombinedMoves$MovedInYear == CombinedMoves$MarriageYear & CombinedMoves$MovedInMonth != CombinedMoves$MarriageMonth, na.rm=T)
# idtemp = CombinedMoves$MovedInYear == CombinedMoves$MarriageYear & CombinedMoves$MovedInMonth != CombinedMoves$MarriageMonth & !is.na(CombinedMoves$MarriageYear)
# CombinedMoves[idtemp,] %>% select(-HomeNumThisYear, -Quarter, -AnyMoveMonthKnownThisYr, -BornDate, -BornMonth)

CombinedMoves$nYearMoves[is.na(CombinedMoves$nYearMoves)] = 0
CombinedMoves$HomeStartMonthMod = CombinedMoves$HomeStartMonth
CombinedMoves$ReplaceM = 
  (is.na(CombinedMoves$HomeStartMonth) | CombinedMoves$HomeStartMonth==99) & 
  CombinedMoves$nYearMoves==1 & 
  !is.na(CombinedMoves$MovedInMonth) & 
  CombinedMoves$MovedInMonth!=99
CombinedMoves$HomeStartMonthMod[CombinedMoves$ReplaceM] = 
  CombinedMoves$MovedInMonth[CombinedMoves$ReplaceM]
# This leaves 155 more unknown months
sum(is.na(CombinedMoves$HomeStartMonthMod) | CombinedMoves$HomeStartMonthMod==99)

# Problem: MovedInYear can match multiple homes
CombinedMoves$CouldReplaceM = (is.na(CombinedMoves$HomeStartMonth) | CombinedMoves$HomeStartMonth==99) & !is.na(CombinedMoves$MovedInMonth) & CombinedMoves$MovedInMonth!=99
CombinedMoves %>% filter(nYearMoves>1)
CombinedMoves %>% filter(nYearMoves-nYearUnknownMonths==1 & nYearUnknownMonths>0)
# How many people have only one missing month in a year with more than one month and a known move-in?
length(unique(CombinedMoves$id[CombinedMoves$nYearMoves-CombinedMoves$nYearUnknownMonths==1 & CombinedMoves$nYearUnknownMonths>0]))
# Most of these are only two-move years
CombinedMovesMultiHome =  
  CombinedMoves %>% 
  filter(nYearMoves-nYearUnknownMonths==1 & nYearUnknownMonths>0) %>%
  group_by(id, HomeStartYear) %>%
  mutate(
    InYearMonthMatch = max(ifelse(HomeStartMonth==MovedInMonth, 1, 0), na.rm=T),
    Replace2 = ifelse(is.na((HomeStartMonthMod) | HomeStartMonthMod==99) & InYearMonthMatch==0, 1, 0)
  )
CombinedMoves = 
  CombinedMoves %>%
  left_join(CombinedMovesMultiHome %>% ungroup %>% select(id, RelationshipNum, HomeNum, Replace2))

CombinedMoves$HomeStartMonthMod[CombinedMoves$Replace2==1 & !is.na(CombinedMoves$Replace2)] = 
  CombinedMoves$MovedInMonth[CombinedMoves$Replace2==1 & !is.na(CombinedMoves$Replace2)]

# There are 24 cases where the marriage month is known, but the person did not say she moved when she got married,
# so replacing the movement month with the marriage month is suspect
CombinedMoves$ReplaceMonthMarr = is.na(CombinedMoves$HomeStartMonthMod) & CombinedMoves$nYearMoves==1  & !is.na(CombinedMoves$MarriageMonth) & CombinedMoves$MarriageMonth!=99

fMovesFull = fMoves %>%
  left_join(CombinedMoves %>% select(id, HomeNum, HomeStartMonthMod)) %>%
  mutate(HomeStartMonthMod = ifelse(!is.na(HomeStartMonthMod), HomeStartMonthMod, HomeStartMonth),
         ImputedMonth = ifelse(is.na(HomeStartMonth) | HomeStartMonth!=99, 1, 0),
         HomeStartMonth = ifelse(!is.na(HomeStartMonth) & HomeStartMonth!=99, HomeStartMonth, HomeStartMonthMod))
  # select(id, HomeNum, HomeStartYear, HomeStartMonth, StateCode)

# 271 months imputed from romantic relationships
sum((fMoves$HomeStartMonth==99 & !is.na(fMoves$HomeStartMonth)) | is.na(fMoves$HomeStartMonth)) -
sum((fMovesFull$HomeStartMonth==99 & !is.na(fMovesFull$HomeStartMonth)) | is.na(fMovesFull$HomeStartMonth))


##################

# Have to recalculate known/unknown months because some were replaced
fMovesSmall = 
  fMovesFull %>%
  filter(MoveUnimportant==0) %>%
  group_by(id, HomeStartYear) %>%
  arrange(id, HomeNum) %>%
  mutate(nYearMoves = length(id),
         HomeNumInYear = row_number(),
         nYearUnknownMonths = sum(HomeStartMonth==99),
         nYearKnownMonths = sum(HomeStartMonth!=99),
         LastMoveInYearWithMonthKnown = ifelse(nYearKnownMonths>0, max(HomeNum[HomeStartMonth!=99]), NA),
         LastHomeNumInYearWithMonthKnown = ifelse(nYearKnownMonths>0, max(HomeNumInYear[HomeStartMonth!=99]), NA),
         IsMoveAfterLastKnown = ifelse(HomeNum>LastMoveInYearWithMonthKnown | is.na(LastMoveInYearWithMonthKnown), 1, 0),
         IsLastKnown = ifelse(HomeNum==LastMoveInYearWithMonthKnown | (is.na(LastMoveInYearWithMonthKnown) & HomeNumInYear==1), 1, 0),
         LastKnownMonth = ifelse(nYearKnownMonths>0, HomeStartMonth[HomeNum==max(HomeNum[HomeStartMonth!=99])], 0))
# 3 of the unknown months are before the last known month:
sum(fMovesSmall$HomeNum<fMovesSmall$LastMoveInYearWithMonthKnown & fMovesSmall$HomeStartMonth==99, na.rm=T)
# These people are the problem
# Luckily, every move in an unknown month that is followed by a move in a known month in the same year
# is to overseas from overseas to A
fMovesSmall %>% filter(id%in%c(442, 837, 2321))
fMovesSmall %>% filter(id==2321)
# This is just a more complicated version of the MoveUnimportant filter
# These moves also do not matter, so drop them
# After dropping them, the HomeNumInYear and everything based on it have to be recalculated
fMovesSmall = 
  fMovesFull %>%
  filter(MoveUnimportant==0) %>%
  filter(!(id %in% c(442, 837, 2321) & HomeNum==.5)) %>%
  group_by(id, HomeStartYear) %>%
  arrange(id, HomeNum) %>%
  mutate(nYearMoves = length(id),
         HomeNumInYear = row_number(),
         nYearUnknownMonths = sum(HomeStartMonth==99),
         nYearKnownMonths = sum(HomeStartMonth!=99),
         LastMoveInYearWithMonthKnown = ifelse(nYearKnownMonths>0, max(HomeNum[HomeStartMonth!=99]), NA),
         LastHomeNumInYearWithMonthKnown = ifelse(nYearKnownMonths>0, max(HomeNumInYear[HomeStartMonth!=99]), NA),
         IsMoveAfterLastKnown = ifelse(HomeNum>LastMoveInYearWithMonthKnown | is.na(LastMoveInYearWithMonthKnown), 1, 0),
         IsLastKnown = ifelse(HomeNum==LastMoveInYearWithMonthKnown | (is.na(LastMoveInYearWithMonthKnown) & HomeNumInYear==1), 1, 0),
         LastKnownMonth = ifelse(nYearKnownMonths>0, HomeStartMonth[HomeNum==max(HomeNum[HomeStartMonth!=99])], 0))
# Now none of the unknown months are before the last known month:
sum(fMovesSmall$HomeNum<fMovesSmall$LastMoveInYearWithMonthKnown & fMovesSmall$HomeStartMonth==99, na.rm=T)

# Spread missing months over the remainder of the year
fMovesSmall =
fMovesSmall %>% 
  mutate(MovesSinceLastKnown = ifelse(is.na(LastHomeNumInYearWithMonthKnown), HomeNumInYear, HomeNumInYear-LastHomeNumInYearWithMonthKnown),
         MonthsPerUnknownMove = ifelse(nYearUnknownMonths>0, (13-LastKnownMonth)/nYearUnknownMonths, 0),
         MonthsSinceLastKnown = round(MonthsPerUnknownMove*MovesSinceLastKnown-.5*MonthsPerUnknownMove),
         HomeStartMonth = ifelse(HomeStartMonth!=99, HomeStartMonth,LastKnownMonth+MonthsSinceLastKnown))



#############

# Impute the rest of the months
# Add survey date as final final date
fMoveYearFinish = 
  fMovesSmall %>% 
  select(id, HomeNum, HomeStartYear) %>%
  group_by(id) %>%
  arrange(id, HomeNum) %>%
  mutate(HomeNum = lag(HomeNum)) %>%
  rename(HomeFinishYear=HomeStartYear) %>%
  filter(!is.na(HomeNum))
fMoveMonthFinish = 
  fMovesSmall %>% ungroup %>% 
  select(id, HomeNum, HomeStartMonth) %>%
  group_by(id) %>%
  arrange(id, HomeNum) %>%
  mutate(HomeNum = lag(HomeNum)) %>%
  rename(HomeFinishMonth=HomeStartMonth) %>%
  filter(!is.na(HomeNum))

fMovesSmall = 
  fMovesSmall %>%
  left_join(fMoveMonthFinish, by=c("id","HomeNum")) %>%
  left_join(fMoveYearFinish, by=c("id","HomeNum")) %>%
  # select(id, HomeNum, HomeStartYear, HomeStartMonth, HomeFinishYear, HomeFinishMonth, StateCode, ImputedMonth) %>%
  mutate(HomeFinishYear = ifelse(HomeFinishMonth-1>0, HomeFinishYear, HomeFinishYear-1),
         HomeFinishMonth = ifelse(HomeFinishMonth-1>0, HomeFinishMonth-1, 12))

# Add a date as end of final move after 1985
# The timing is not important, as I do not use data after 1985
fMovesSmall = 
  fMovesSmall %>%
  group_by(id) %>%
  mutate(FinalMove = HomeNum==max(HomeNum),
         HomeFinishYear = ifelse(FinalMove, 1987 , HomeFinishYear),
         HomeFinishMonth = ifelse(FinalMove, 12 , HomeFinishMonth),
         StartDate = ymd(paste(HomeStartYear, HomeStartMonth,"01",sep="-")),
         FinishDate = ymd(paste(HomeFinishYear, HomeFinishMonth,"01",sep="-")))

# # No overlapping periods:
# fMovesSmall %>%
#   group_by(id) %>%
#   arrange(id, StartDate, HomeNum) %>%
#   filter(FinishDate>=lead(StartDate))
# fMovesTest = 
#   fMovesSmall %>%
#   filter(id==1) %>%
#   select(id, HomeNum, StartDate, FinishDate) %>%
#   group_by(id, HomeNum) %>% 
#   mutate(Date = list(seq.Date(StartDate, FinishDate, by = "month"))) %>%
#   unnest(Date) %>%
#   ungroup()
# 
# dt = tibble(
#   StartDate = ymd("2010-07-01"),
#   FinishDate = ymd("2011-02-01")
# )
# dtpanel = 
#   dt %>%
#   dplyr::mutate(Date = list(seq.Date(StartDate, FinishDate, by = "month"))) %>%
#   dplyr::mutate(Date = list(seq.Date(StartDate, FinishDate, by = "month"))) %>%
#   unnest(Date) 
# with(dt, list(seq.Date(StartDate, FinishDate, by = "month")))

fMovesPanel = 
  fMovesSmall %>%
  # filter(id==1) %>%
  # unique() %>%
# filter(!is.na(StartDate) & !is.na(FinishDate)) %>%
  group_by(id, HomeNum) %>% 
  mutate(Date = list(seq(StartDate, FinishDate, by = "month"))) %>%
  unnest(Date) %>%
  ungroup() %>%
  select(id, Date, StateCode, HomeStartYear, HomeStartMonth, HomeFinishYear, HomeFinishMonth, ImputedMonth)

# fMovesPanel$HomeNum = as.integer(ifelse(fMovesPanel$HomeNum==0, 0, ifelse(fMovesPanel$HomeNum==.5, fMovesPanel$HomeNum+.5, fMovesPanel$HomeNum+1)))
fMovesPanel$HomeStartMonth = as.integer(fMovesPanel$HomeStartMonth)
fMovesPanel$HomeStartYear = as.integer(fMovesPanel$HomeStartYear)
fMovesPanel$HomeFinishMonth = as.integer(fMovesPanel$HomeFinishMonth)
fMovesPanel$HomeFinishYear = as.integer(fMovesPanel$HomeFinishYear)
fMovesPanel$ImputedMonth = as.integer(fMovesPanel$ImputedMonth)



#############
# Contraceptives


# x[1:20,c("id",
#      "q76ay1st", "q76cy1st", "q76am1st", "q76cm1st", "q76b11st", "q76b21st",
#      "q76ay2nd", "q76cy2nd", "q76am2nd", "q76cm2nd", "q76b12nd", "q76b22nd",
#      "q76ay3rd", "q76cy3rd", "q76am3rd", "q76cm3rd", "q76b13rd", "q76b23rd",
#      "q76ay4th", "q76cy4th", "q76am4th", "q76cm4th", "q76b14th", "q76b24th")]
# 
# x[1:40,c("id",
#          "q76ay1st", "q76cy1st", "q76am1st", "q76cm1st", "q76b11st", "q76b21st",
#          "q76ay2nd", "q76cy2nd", "q76am2nd", "q76cm2nd", "q76b12nd", "q76b22nd")]

# sum(is.na(filter(x, q76b11st==1 | q76b21st==1)$q76ay1st))
# sum(is.na(filter(x, q76b12nd==1 | q76b22nd==1)$q76ay2nd))
# sum(is.na(filter(x, q76b13rd==1 | q76b23rd==1)$q76ay3rd))

# q74b only asked if q74a == 2
# q74b used to clarify q74a
x$AnyContraceptive = ifelse(x$q74a==1,1,0)
x$AnyContraceptive[x$q74b == 1] = 1
x$AnyContraceptive[x$q74a == 8] = NA
# x$AnyContraceptive[x$q74b == 8] = NA

# Ever used Pill
# Start with 0 for everyone who reports on contraceptive use
# and NA for everyone who refused to answer
x$EverUsedPill = NA
x$EverUsedPill[!is.na(x$AnyContraceptive)] = 0
# x$EverUsedPill = ifelse(x$q76b11st==1, 1, 0)
# Change to 1 if report pill use at any time
x$EverUsedPill[x$q76b11st==1 & !is.na(x$q76b11st)] = 1
x$EverUsedPill[x$q76b21st==1 & !is.na(x$q76b21st)] = 1
x$EverUsedPill[x$q76b12nd==1 & !is.na(x$q76b12nd)] = 1
x$EverUsedPill[x$q76b22nd==1 & !is.na(x$q76b22nd)] = 1
x$EverUsedPill[x$q76b13rd==1 & !is.na(x$q76b13rd)] = 1
x$EverUsedPill[x$q76b23rd==1 & !is.na(x$q76b23rd)] = 1
x$EverUsedPill[x$q76b14th==1 & !is.na(x$q76b14th)] = 1
x$EverUsedPill[x$q76b24th==1 & !is.na(x$q76b24th)] = 1
x$EverUsedPill[x$q76b15th==1 & !is.na(x$q76b11st)] = 1
x$EverUsedPill[x$q76b25th==1 & !is.na(x$q76b24th)] = 1
x$EverUsedPill[x$q76b16th==1 & !is.na(x$q76b11st)] = 1
x$EverUsedPill[x$q76b26th==1 & !is.na(x$q76b24th)] = 1
x$EverUsedPill[x$q76b17th==1 & !is.na(x$q76b11st)] = 1
x$EverUsedPill[x$q76b27th==1 & !is.na(x$q76b24th)] = 1
x$EverUsedPill[x$q76b18th==1 & !is.na(x$q76b11st)] = 1
x$EverUsedPill[x$q76b28th==1 & !is.na(x$q76b24th)] = 1
x$EverUsedPill[x$q76b19th==1 & !is.na(x$q76b11st)] = 1
x$EverUsedPill[x$q76b29th==1 & !is.na(x$q76b24th)] = 1
# x$EverUsedPill[x$q76b110h==1] = 1
# x$EverUsedPill[x$q76b210h==1] = 1
# x$EverUsedPill[x$q76b111h==1] = 1
# x$EverUsedPill[x$q76b211h==1] = 1
# x$EverUsedPill[x$q76b112h==1] = 1
# x$EverUsedPill[x$q76b212h==1] = 1
## Not needed because question type of contraceptive used
## only asked to those who had a 1 on either q74a or q74b:
# x$EverUsedPill[x$AnyContraceptive==0] = 0

# Ever used condom
# Start with 0 for everyone who reports on contraceptive use
# and NA for everyone who refused to answer
x$EverUsedCondom = NA
x$EverUsedCondom[!is.na(x$AnyContraceptive)] = 0
# x$EverUsedCondom = ifelse(x$q76b11st==1, 1, 0)
# Change to 1 if report condom use at any time
x$EverUsedCondom[x$q76b11st==4 & !is.na(x$q76b11st)] = 1
x$EverUsedCondom[x$q76b21st==4 & !is.na(x$q76b21st)] = 1
x$EverUsedCondom[x$q76b12nd==4 & !is.na(x$q76b12nd)] = 1
x$EverUsedCondom[x$q76b22nd==4 & !is.na(x$q76b22nd)] = 1
x$EverUsedCondom[x$q76b13rd==4 & !is.na(x$q76b13rd)] = 1
x$EverUsedCondom[x$q76b23rd==4 & !is.na(x$q76b23rd)] = 1
x$EverUsedCondom[x$q76b14th==4 & !is.na(x$q76b14th)] = 1
x$EverUsedCondom[x$q76b24th==4 & !is.na(x$q76b24th)] = 1
x$EverUsedCondom[x$q76b15th==4 & !is.na(x$q76b11st)] = 1
x$EverUsedCondom[x$q76b25th==4 & !is.na(x$q76b24th)] = 1
x$EverUsedCondom[x$q76b16th==4 & !is.na(x$q76b11st)] = 1
x$EverUsedCondom[x$q76b26th==4 & !is.na(x$q76b24th)] = 1
x$EverUsedCondom[x$q76b17th==4 & !is.na(x$q76b11st)] = 1
x$EverUsedCondom[x$q76b27th==4 & !is.na(x$q76b24th)] = 1
x$EverUsedCondom[x$q76b18th==4 & !is.na(x$q76b11st)] = 1
x$EverUsedCondom[x$q76b28th==4 & !is.na(x$q76b24th)] = 1
x$EverUsedCondom[x$q76b19th==4 & !is.na(x$q76b11st)] = 1
x$EverUsedCondom[x$q76b29th==4 & !is.na(x$q76b24th)] = 1

# Ever used condom
# Start with 0 for everyone who reports on contraceptive use
# and NA for everyone who refused to answer
x$EverUsedIUD = NA
x$EverUsedIUD[!is.na(x$AnyContraceptive)] = 0
# x$EverUsedIUD = ifelse(x$q76b11st==1, 1, 0)
# Change to 1 if report IUD use at any time
x$EverUsedIUD[x$q76b11st==2 & !is.na(x$q76b11st)] = 1
x$EverUsedIUD[x$q76b21st==2 & !is.na(x$q76b21st)] = 1
x$EverUsedIUD[x$q76b12nd==2 & !is.na(x$q76b12nd)] = 1
x$EverUsedIUD[x$q76b22nd==2 & !is.na(x$q76b22nd)] = 1
x$EverUsedIUD[x$q76b13rd==2 & !is.na(x$q76b13rd)] = 1
x$EverUsedIUD[x$q76b23rd==2 & !is.na(x$q76b23rd)] = 1
x$EverUsedIUD[x$q76b14th==2 & !is.na(x$q76b14th)] = 1
x$EverUsedIUD[x$q76b24th==2 & !is.na(x$q76b24th)] = 1
x$EverUsedIUD[x$q76b15th==2 & !is.na(x$q76b11st)] = 1
x$EverUsedIUD[x$q76b25th==2 & !is.na(x$q76b24th)] = 1
x$EverUsedIUD[x$q76b16th==2 & !is.na(x$q76b11st)] = 1
x$EverUsedIUD[x$q76b26th==2 & !is.na(x$q76b24th)] = 1
x$EverUsedIUD[x$q76b17th==2 & !is.na(x$q76b11st)] = 1
x$EverUsedIUD[x$q76b27th==2 & !is.na(x$q76b24th)] = 1
x$EverUsedIUD[x$q76b18th==2 & !is.na(x$q76b11st)] = 1
x$EverUsedIUD[x$q76b28th==2 & !is.na(x$q76b24th)] = 1
x$EverUsedIUD[x$q76b19th==2 & !is.na(x$q76b11st)] = 1
x$EverUsedIUD[x$q76b29th==2 & !is.na(x$q76b24th)] = 1


ContraStartYear = 
  x %>%
  select(id, q76ay1st, q76ay2nd, q76ay3rd, q76ay4th, q76ay5th, q76ay6th, q76ay7th, q76ay8th, q76ay9th) %>%
  gather(ContraNum, ContraStartYear, q76ay1st:q76ay9th, na.rm=T) %>%
  mutate(ContraNum = as.integer(substr(ContraNum, 6,6)),
         ContraStartYear = ContraStartYear+1900)
ContraFinishYear = 
  x %>%
  select(id, q76cy1st, q76cy2nd, q76cy3rd, q76cy4th, q76cy5th, q76cy6th, q76cy7th, q76cy8th, q76cy9th) %>%
  gather(ContraNum, ContraFinishYear, q76cy1st:q76cy9th, na.rm=T) %>%
  mutate(ContraNum = as.integer(substr(ContraNum, 6,6)),
         ContraFinishYear = ContraFinishYear+1900) %>%
  group_by(id, ContraNum) %>%
  mutate(ContraFinishYear = ifelse(ContraNum==max(ContraNum) & is.na(ContraFinishYear), 1987, ContraFinishYear)) %>%
  ungroup()
  # mutate(ContraLast = ifelse(ContraNum==max(ContraNum), 1, 0))
  
ContraStartMonth = 
  x %>%
  select(id, q76am1st, q76am2nd, q76am3rd, q76am4th, q76am5th, q76am6th, q76am7th, q76am8th, q76am9th) %>%
  gather(ContraNum, ContraStartMonth, q76am1st:q76am9th, na.rm=T) %>%
  mutate(ContraNum = as.integer(substr(ContraNum, 6,6)))
ContraFinishMonth = 
  x %>%
  select(id, q76cm1st, q76cm2nd, q76cm3rd, q76cm4th, q76cm5th, q76cm6th, q76cm7th, q76cm8th, q76cm9th) %>%
  gather(ContraNum, ContraFinishMonth, q76cm1st:q76cm9th, na.rm=T) %>%
  mutate(ContraNum = as.integer(substr(ContraNum, 6,6))) %>%
  group_by(id, ContraNum) %>%
  mutate(ContraFinishMonth = ifelse(ContraNum==max(ContraNum) & is.na(ContraFinishMonth), 12, ContraFinishMonth)) %>%
  ungroup()
ContraType1 = 
  x %>%
  select(id, q76b11st, q76b12nd, q76b13rd, q76b14th, q76b15th, q76b16th, q76b17th, q76b18th, q76b19th) %>%
  gather(ContraNum, ContraType1, q76b11st:q76b19th, na.rm=T) %>%
  mutate(ContraNum = as.integer(substr(ContraNum, 6,6)))
ContraType2 = 
  x %>%
  select(id, q76b21st, q76b22nd, q76b23rd, q76b24th, q76b25th, q76b26th, q76b27th, q76b28th, q76b29th) %>%
  gather(ContraNum, ContraType2, q76b21st:q76b29th, na.rm=T) %>%
  mutate(ContraNum = as.integer(substr(ContraNum, 6,6)))
ContraUsedAgain = 
  x %>%
  select(id, q771st, q772nd, q773rd, q774th, q775th, q776th, q777th, q778th, q779th) %>%
  gather(ContraNum, ContraUsedAgain, q771st:q779th, na.rm=T) %>%
  mutate(ContraNum = as.integer(substr(ContraNum, 4,4)))

Contra = 
  full_join(ContraStartYear, ContraStartMonth, by=c("id", "ContraNum")) %>%
  full_join(ContraFinishYear, by=c("id", "ContraNum")) %>%
  full_join(ContraFinishMonth, by=c("id", "ContraNum")) %>%
  full_join(ContraType1, by=c("id", "ContraNum")) %>%
  full_join(ContraType2, by=c("id", "ContraNum")) %>%
  full_join(ContraUsedAgain, by=c("id", "ContraNum"))
  # left_join(x %>% select(id, SurveyYear, SurveyMonth), by="id")
# 88 means refusal to answer (probably)
# Drop women who refuse to provide year of use 
ContraDrop =
  Contra %>% 
  group_by(id) %>%
  filter(max(ContraStartYear==1988)==1) %>%
  arrange(id, ContraNum)
  # mutate(Any88 = max(ifelse(ContraStartYear==1988, 1,0)))
# sum(is.na(Contra$ContraType1[Contra$ContraStartMonth!=88]))
PillDrop = unique(ContraDrop$id)
Contra = Contra %>% filter(!(id %in% PillDrop))
# 98 means still using
Contra$ContraFinishYear[Contra$ContraFinishYear==1998 & !is.na(Contra$ContraFinishYear)] = 1987
Contra$ContraFinishMonth[Contra$ContraFinishMonth==98 & !is.na(Contra$ContraFinishMonth)] = 12
# 5 women who said they used some method refuse to report date of ContraNum==1
# Of the women who report the date, 3 report own sterilization as method,
#  and dates of stopping are then not reported
Contra$ContraFinishYear[Contra$ContraType1==10 & !is.na(Contra$ContraType1)] = 1987
Contra$ContraFinishMonth[Contra$ContraType1==10 & !is.na(Contra$ContraType1)] = 12
# If husband sterilized (ContraType1==11), asked if used again
Contra$ContraFinishYear[Contra$ContraType1==11 & !is.na(Contra$ContraType1) & Contra$ContraUsedAgain==2] = 1987
Contra$ContraFinishMonth[Contra$ContraType1==11 & !is.na(Contra$ContraType1 & Contra$ContraUsedAgain==2)] = 12
# # Check if any use periods are missing an ending
# Contra =
#   Contra %>%
#   mutate(MissingFinish = ifelse(is.na(ContraFinishYear) & !is.na(ContraStartYear),1,0),
#          MissingStart = ifelse(is.na(ContraStartYear) & !is.na(ContraFinishYear),1,0),
#          MissingFinishMonth = ifelse(is.na(ContraFinishMonth) & !is.na(ContraStartMonth),1,0),
#          MissingStartMonth = ifelse(is.na(ContraStartMonth) & !is.na(ContraFinishMonth),1,0))


# Contra = 
#   Contra %>%
#   group_by(id) %>%
#   arrange(id, ContraNum) %>% 
#   mutate(RepeatYear = ifelse(ContraStartYear!=lag(ContraFinishYear) | is.na(lag(ContraFinishYear)), 0, 1))
# # There are no cases of repeat years where the month is unknown
# idRepeats = unique(Contra$id[Contra$RepeatYear==1 & (Contra$ContraStartMonth==99 | lag(Contra$ContraFinishMonth)==99)])
# Contra %>% filter(id%in%idRepeats)

Contra = 
  Contra %>%
  group_by(id) %>%
  arrange(id, ContraNum) %>% 
  mutate(FinishInSameYear = ifelse(ContraFinishYear==ContraStartYear, 1, 0))
# # One person started using in an unknown month and then stopped that year (this seems to have depended on some code that changed)
# FinishAmbiguityID = unique(Contra$id[Contra$FinishInSameYear==1 & (Contra$ContraStartMonth==99 | Contra$ContraFinishMonth==99)])
# Contra %>% filter(id%in%FinishAmbiguityID)
# Contra %>% filter(id%in% unique(Contra$id[Contra$FinishInSameYear==1]))
# # It is not important because she starts in 1986, and I will drop 1986 data
# Contra$ContraStartMonth[Contra$id==1419 & Contra$ContraNum==2] = 1
# # No mixed up months
# sum(Contra$ContraStartMonth[Contra$FinishInSameYear==1]>Contra$ContraFinishMonth[Contra$FinishInSameYear==1])

# There are some mixed up dates
# Contra %>% filter(id %in% c(2065, 2140, 2390))
# id 2065 ContraNum 5: reported starting use at a date after the survey date
# Ending month imputed as survey month
# change starting month to earlier in year (4)
# Not important. No pill use and after date range I will use.
# Contra$ContraStartMonth[Contra$id==2065 & Contra$ContraNum==5] = 4
# id 2140: ContraNum 3 reports ending IUD use in sample month 
# ContraNum 4: reports starting pill use month after sample month (and still using)
# Eliminate the last Contra use case
# Contra = Contra %>% filter(!(id==2140 & ContraNum==4))
# id 2390: ContraNum 5 reports reports starting pill use month after sample month (and still using)
# Eliminate the last Contra use case
# Contra = Contra %>% filter(!(id==2390 & ContraNum==5))
# # There are no cases where 
# table(Contra$FinishInSameYear[Contra$ContraStartMonth==99])
# table(Contra$FinishInSameYear[Contra$ContraFinishMonth==99])

Contra$ContraStartMonth[Contra$ContraStartMonth==99] = AssumedStartMonth
Contra$ContraFinishMonth[Contra$ContraFinishMonth==99] = AssumedFinishMonth

Contra =
  Contra %>%
  mutate(Pill = ifelse(ContraType1==1 | ContraType2==1, 1, 0),
         IUD = ifelse(ContraType1==2 | ContraType2==2, 1, 0),
         Diaphragm = ifelse(ContraType1==4 | ContraType2==4, 1, 0),
         Condom = ifelse(ContraType1==5 | ContraType2==5, 1, 0))

Contra$StartDate = ymd(paste(Contra$ContraStartYear, Contra$ContraStartMonth,"01",sep="-"))
Contra$FinishDate = ymd(paste(Contra$ContraFinishYear, Contra$ContraFinishMonth,"01",sep="-"))
# Check for missing values
Contra %>% filter(is.na(FinishDate) | is.na(StartDate))
ContraDrop = unique((Contra %>% filter((is.na(FinishDate) | is.na(StartDate)) & Pill==1))$id)
# These do not need to be dropped because they are not pill use

# Check if any is out of order
Contra %>% filter(StartDate>FinishDate)
# Three ids cause trouble
# Contra %>% filter(id==986)
Contra = 
  Contra %>% 
  group_by(id) %>%
  arrange(id, ContraNum) %>%
  mutate(FinishBeforeStart = ifelse(!(ContraFinishYear==lead(ContraStartYear) & ContraFinishMonth>=lead(ContraStartMonth)) | is.na(lead(ContraStartMonth)), 1, 0),
         FinishAfterStart = ifelse(!(ContraFinishYear==lead(ContraStartYear) & ContraFinishMonth>lead(ContraStartMonth)) | is.na(lead(ContraStartMonth)), 0, 1))
# # No overlapping spells
# Contra %>% filter(FinishAfterStart==1)
# # Shared edges are a problem, though:
# Contra %>% filter(FinishBeforeStart==0)
Contra =
  Contra %>% 
  mutate(ContraFinishYear = ifelse(FinishBeforeStart==0 & ContraFinishMonth==1, ContraFinishYear-1, ContraFinishYear),
         ContraFinishMonth = ifelse(FinishBeforeStart==1, ContraFinishMonth, ifelse(ContraFinishMonth==1, 12, ContraFinishMonth-1)))
# Recalculate the date columns
Contra$StartDate = ymd(paste(Contra$ContraStartYear, Contra$ContraStartMonth,"01",sep="-"))
Contra$FinishDate = ymd(paste(Contra$ContraFinishYear, Contra$ContraFinishMonth,"01",sep="-"))

ContraPanel = 
  Contra %>% 
  # filter(!is.na(StartDate) & !is.na(FinishDate)) %>%
  group_by(id, ContraNum) %>% 
  mutate(Date = list(seq.Date(StartDate, FinishDate, by = "month"))) %>%
  unnest(Date) %>%
  select(-ContraUsedAgain, -ContraType1, -ContraType2, -FinishInSameYear, -StartDate, -FinishDate)


PillDateFirst = 
  Contra %>%
  filter(Pill==1) %>%
  group_by(id) %>%
  filter(ContraNum == min(ContraNum)) %>%
  select(id, StartDate) %>%
  rename(PillDateFirst = StartDate)
CondomDateFirst = 
  Contra %>%
  filter(Condom==1) %>%
  group_by(id) %>%
  filter(ContraNum == min(ContraNum)) %>%
  select(id, StartDate) %>%
  rename(CondomDateFirst = StartDate)
IUDDateFirst = 
  Contra %>%
  filter(IUD==1) %>%
  group_by(id) %>%
  filter(ContraNum == min(ContraNum)) %>%
  select(id, StartDate) %>%
  rename(IUDDateFirst = StartDate)

x = x %>% left_join(PillDateFirst)
x = x %>% left_join(CondomDateFirst)
x = x %>% left_join(IUDDateFirst)

x$AgeAtFirstPill =  zoo::as.yearmon(x$PillDateFirst)-zoo::as.yearmon(paste(x$BornYear,x$BornMonthString,sep="-"))
x$AgeAtFirstPillCensored = x$AgeAtFirstPill
x$AgeAtFirstPillCensored[is.na(x$AgeAtFirstPillCensored)] = 99
x$PillBefore21 = ifelse(x$AgeAtFirstPill>=21 | is.na(x$AgeAtFirstPill), 0, 1)
x$PillBefore20 = ifelse(x$AgeAtFirstPill>=20 | is.na(x$AgeAtFirstPill), 0, 1)
x$PillBefore19 = ifelse(x$AgeAtFirstPill>=19 | is.na(x$AgeAtFirstPill), 0, 1)
x$PillBefore18 = ifelse(x$AgeAtFirstPill>=18 | is.na(x$AgeAtFirstPill), 0, 1)

x$AgeAtFirstCondom =  zoo::as.yearmon(x$CondomDateFirst)-zoo::as.yearmon(paste(x$BornYear,x$BornMonthString,sep="-"))
x$AgeAtFirstCondomCensored = x$AgeAtFirstCondom
x$AgeAtFirstCondomCensored[is.na(x$AgeAtFirstCondomCensored)] = 99
x$CondomBefore21 = ifelse(x$AgeAtFirstPillCensored>=21, 0, 1)
x$CondomBefore18 = ifelse(x$AgeAtFirstPillCensored>=20, 0, 1)
x$CondomBefore18 = ifelse(x$AgeAtFirstPillCensored>=19, 0, 1)
x$CondomBefore18 = ifelse(x$AgeAtFirstPillCensored>=18, 0, 1)

x$AgeAtFirstIUD =  zoo::as.yearmon(x$IUDDateFirst)-zoo::as.yearmon(paste(x$BornYear,x$BornMonthString,sep="-"))
x$AgeAtFirstIUDCensored = x$AgeAtFirstIUD
x$AgeAtFirstIUDCensored[is.na(x$AgeAtFirstIUDCensored)] = 99
x$IUDBefore21 = ifelse(x$AgeAtFirstIUD>=21 | is.na(x$AgeAtFirstIUD), 0, 1)
x$IUDBefore21 = ifelse(x$AgeAtFirstIUD>=20 | is.na(x$AgeAtFirstIUD), 0, 1)
x$IUDBefore18 = ifelse(x$AgeAtFirstIUD>=19 | is.na(x$AgeAtFirstIUD), 0, 1)
x$IUDBefore18 = ifelse(x$AgeAtFirstIUD>=18 | is.na(x$AgeAtFirstIUD), 0, 1)


################
# Marriage

# 8 means "other", but no one has an 8
x$RelationshipType1 = ifelse(x$q29a1st<=3, 1, 0)
x$RelationshipType2 = ifelse(x$q29a2nd<=3, 1, 0)
x$RelationshipType3 = ifelse(x$q29a3rd<=3, 1, 0)
x$RelationshipType4 = ifelse(x$q29a4th<=3, 1, 0)
x$RelationshipType5 = ifelse(x$q29a5th<=3, 1, 0)
x$RelationshipType6 = ifelse(x$q29a6th<=3, 1, 0)

x$MarriageYear1 = 1900+x$q29cy1st
x$MarriageYear2 = 1900+x$q29cy2nd
x$MarriageYear3 = 1900+x$q29cy3rd
x$MarriageYear4 = 1900+x$q29cy4th
x$MarriageYear5 = 1900+x$q29cy5th
# x$MarriageYear6 = x$q29cy6th

x$YearFirstMarried = 
  pmin(x$MarriageYear1,
       x$MarriageYear2,
       x$MarriageYear3,
       x$MarriageYear4,
       x$MarriageYear5, na.rm=T)

# Average month of marriage is approximately June
x$MarriageMonth1 = ifelse(x$q29cm1st<99,x$q29cm1st,6)
x$MarriageMonth2 = ifelse(x$q29cm2nd<99,x$q29cm2nd,6)
x$MarriageMonth3 = ifelse(x$q29cm3rd<99,x$q29cm3rd,6)
x$MarriageMonth4 = ifelse(x$q29cm4th<99,x$q29cm4th,6)
x$MarriageMonth5 = ifelse(x$q29cm5th<99,x$q29cm5th,6)

x$MonthFirstMarried = 
  with(x,
       ifelse(!is.na(MarriageYear1), MarriageMonth1,
              ifelse(!is.na(MarriageYear2), MarriageMonth2,
                     ifelse(!is.na(MarriageYear3), MarriageMonth3,
                            ifelse(!is.na(MarriageYear4), MarriageMonth4,
                                   ifelse(!is.na(MarriageYear5), MarriageMonth5, NA
                                   )
                            )
                     )
              )
       )
  )

x$DateFirstMarried = ymd(paste(x$YearFirstMarried, x$MonthFirstMarried, "01", sep="-"))


# Fertility 

x$ChbornTot = x$q47b
x$ChbornTot[is.na(x$ChbornTot)] = 0

# First Birth
# Should get rid of this and just use the format in the next section
x$FirstBirthYear = 
with(x, 
     ifelse(ChbornTot==0, NA, 
            ifelse(q53d1st==1, q53by1st,
                   ifelse(q53d2nd==1, q53by2nd,
                          ifelse(q53d3rd==1, q53by3rd,
                                 ifelse(q53d4th==1, q53by4th,
                                        ifelse(q53d5th==1, q53by5th,
                                               ifelse(q53d6th==1, q53by6th,
                                                      ifelse(q53d7th==1, q53by7th,
                                                             ifelse(q53d8th==1, q53by8th,
                                                                    ifelse(q53d9th==1, q53by9th,
                                                                           ifelse(q53d10h==1, q53by10h,
                                                                                  ifelse(q53d11h==1, q53by11h,
                                                                                         ifelse(q53d12h==1, q53by12h,
                                                                                                ifelse(q53d13h==1, q53by13h,
                                                                                                       ifelse(q53d14h==1, q53by14h, q53by15h)
                                                                                                )
                                                                                         )
                                                                                  )
                                                                           )
                                                                    )
                                                             )
                                                      )
                                               )
                                        )
                                 )
                          )
                   )
            )
     )
)
x$FirstBirthMonth = 
  with(x, 
       ifelse(ChbornTot==0, NA, 
              ifelse(q53d1st==1, q53bm1st,
                     ifelse(q53d2nd==1, q53bm2nd,
                            ifelse(q53d3rd==1, q53bm3rd,
                                   ifelse(q53d4th==1, q53bm4th,
                                          ifelse(q53d5th==1, q53bm5th,
                                                 ifelse(q53d6th==1, q53bm6th,
                                                        ifelse(q53d7th==1, q53bm7th,
                                                               ifelse(q53d8th==1, q53bm8th,
                                                                      ifelse(q53d9th==1, q53bm9th,
                                                                             ifelse(q53d10h==1, q53bm10h,
                                                                                    ifelse(q53d11h==1, q53bm11h,
                                                                                           ifelse(q53d12h==1, q53bm12h,
                                                                                                  ifelse(q53d13h==1, q53bm13h,
                                                                                                         ifelse(q53d14h==1, q53bm14h, q53bm15h)
                                                                                                  )
                                                                                           )
                                                                                    )
                                                                             )
                                                                      )
                                                               )
                                                        )
                                                 )
                                          )
                                   )
                            )
                     )
              )
       )
  )
x$FirstBirthDay = 
  with(x, 
       ifelse(ChbornTot==0, NA, 
              ifelse(q53d1st==1, q53bd1st,
                     ifelse(q53d2nd==1, q53bd2nd,
                            ifelse(q53d3rd==1, q53bd3rd,
                                   ifelse(q53d4th==1, q53bd4th,
                                          ifelse(q53d5th==1, q53bd5th,
                                                 ifelse(q53d6th==1, q53bd6th,
                                                        ifelse(q53d7th==1, q53bd7th,
                                                               ifelse(q53d8th==1, q53bd8th,
                                                                      ifelse(q53d9th==1, q53bd9th,
                                                                             ifelse(q53d10h==1, q53bd10h,
                                                                                    ifelse(q53d11h==1, q53bd11h,
                                                                                           ifelse(q53d12h==1, q53bd12h,
                                                                                                  ifelse(q53d13h==1, q53bd13h,
                                                                                                         ifelse(q53d14h==1, q53bd14h, q53bd15h)
                                                                                                  )
                                                                                           )
                                                                                    )
                                                                             )
                                                                      )
                                                               )
                                                        )
                                                 )
                                          )
                                   )
                            )
                     )
              )
       )
  )
x$FirstBirthDay[x$FirstBirthDay==99 & !is.na(x$FirstBirthDay)] = round(median(x$FirstBirthDay[x$FirstBirthDay!=99], na.rm=T))

x$AgeAtFirstBirthMonths = with(x, (1900+FirstBirthYear-BornYear)*12 + FirstBirthMonth-BornMonth)


# All births

# Types of child (own birth is 1)
ChildTypes = 
  x %>%
  select(id, starts_with("q53d")) %>%
  gather(key=ChildNum, value=ChildType, -id) %>%
  mutate(ChildNum = gsub("q53d","",ChildNum)) %>%
  mutate(ChildNum = gsub("[[:alpha:]]","",ChildNum)) %>%
  mutate(ChildNum = as.numeric(ChildNum)) %>%
  arrange(id, ChildNum)

# Years
ChildBirthYears = 
  x %>%
  select(id, starts_with("q53by")) %>%
  gather(key=ChildNum, value=ChildBornYear, -id) %>%
  mutate(ChildNum = gsub("q53by","",ChildNum)) %>%
  mutate(ChildNum = gsub("[[:alpha:]]","",ChildNum)) %>%
  mutate(ChildNum = as.numeric(ChildNum)) %>%
  arrange(id, ChildNum)

# Months
ChildBirthMonths = 
  x %>%
  select(id, starts_with("q53bm")) %>%
  gather(key=ChildNum, value=ChildBornMonth, -id) %>%
  mutate(ChildNum = gsub("q53bm","",ChildNum)) %>%
  mutate(ChildNum = gsub("[[:alpha:]]","",ChildNum)) %>%
  mutate(ChildNum = as.numeric(ChildNum)) %>%
  arrange(id, ChildNum)

ChildBirthsPanel = left_join(ChildTypes, ChildBirthYears)
ChildBirthsPanel = left_join(ChildBirthsPanel, ChildBirthMonths)
# Need to reassign the ChildNum to not have gaps
NumString = c("01","02","03","04","05","06","07","08","09","10","11","12","13","14","15")
ChildBirthsPanel = 
  ChildBirthsPanel  %>% 
  filter(ChildType==1) %>%
  arrange(id, zoo::as.yearmon(paste(ChildBornYear+1900,ChildBornMonth,sep="-")), ChildNum) %>%
  group_by(id) %>%
  mutate(ChildNum = row_number()) %>%
  mutate(Date = ymd(paste(ChildBornYear+1900,ChildBornMonth,"01",sep="-"))) %>%
  mutate(ChildNumStringM = paste("Birth",NumString[row_number()],"Month",sep="")) %>%
  mutate(ChildNumStringY = paste("Birth",NumString[row_number()],"Year",sep="")) %>%
  mutate(ChildNumStringD = paste("Birth",NumString[row_number()],"Date",sep=""))
ChildBirthsPanel$Birth = 1

PregnancyDates = 
  ChildBirthsPanel %>% 
  rename(PregnancyNum = ChildNum) %>%
  mutate(Date = Date-months(9),
         PregnancyStart = 1) %>%
  select(id, Date, PregnancyStart)


# Reshape to get back individual variables for each birth
ChildBirthYearsWide = 
  ChildBirthsPanel %>%
  select(id, ChildNumStringY, ChildBornYear) %>%
  spread(key=ChildNumStringY, value=ChildBornYear)

# Reshape to get back individual variables for each birth
ChildBirthMonthsWide = 
  ChildBirthsPanel %>%
  select(id, ChildNumStringM, ChildBornMonth) %>%
  spread(key=ChildNumStringM, value=ChildBornMonth)

# Reshape to get back individual variables for each birth
ChildBirthDatesWide = 
  ChildBirthsPanel %>%
  select(id, ChildNumStringD, Date) %>%
  spread(key=ChildNumStringD, value=Date)

x = left_join(x, ChildBirthMonthsWide, by="id")
x = left_join(x, ChildBirthYearsWide, by="id")
x = left_join(x, ChildBirthDatesWide, by="id")

# Clean up duplicate dates (twins, etc.)
PregnancyDates = 
  PregnancyDates %>% 
  group_by(id, Date) %>%
  filter(row_number()==1)
ChildBirthsPanel =
  ChildBirthsPanel %>% 
  group_by(id, Date) %>%
  filter(row_number()==1) %>%
  select(id, Date, Birth)


# Abortions
x$EverPregnantNoBirth = ifelse(x$q67a==1, 1, 0)
x$EverAbortion = ifelse(x$q67cz==8 | is.na(x$q67cz), 0, 1)

# # q67i1st: when
# # q67t1st: what kind (termination is 3)
# # q67n1st: How many
# # Goes up to 4
# # if q67t1st == 3
# table(x$q67t1st,x$q67n1st)

# x$Abortion1Date = NA
# x$Abortion1Date[x$q67t1st==3] 


# There are a maximum of 4 periods of fetal loss
# q67i1st gives the date of the first period of losses
# q67t1st gives the type of the first period of losses (3 for "termination")
# q67n1st gives the number of losses in that period
LossTypes = 
  x %>%
  select(id, starts_with("q67t")) %>%
  gather(key=LossNum, value=LossType, -id) %>%
  mutate(LossNum = gsub("q67t","",LossNum)) %>%
  mutate(LossNum = gsub("[[:alpha:]]","",LossNum)) %>%
  mutate(LossNum = as.numeric(LossNum)) %>%
  arrange(id, LossNum)

LossTimes = 
  x %>%
  select(id, starts_with("q67i")) %>%
  gather(key=LossNum, value=LossTime, -id) %>%
  mutate(LossNum = gsub("q67i","",LossNum)) %>%
  mutate(LossNum = gsub("[[:alpha:]]","",LossNum)) %>%
  mutate(LossNum = as.numeric(LossNum)) %>%
  arrange(id, LossNum)

LossTimes = left_join(LossTypes, LossTimes) %>% filter(LossType==3)
# Need to reassign the LossNum to not have gaps
NumString = c("01","02","03","04","05","06","07","08","09","10","11","12","13","14","15")
LossTimes = 
  LossTimes %>%
  arrange(id, LossNum) %>%
  group_by(id) %>%
  mutate(LossNum = row_number()) %>%
  mutate(LossNumString = paste("Abortion",NumString[row_number()],"Time",sep=""))

LossTimesWide = 
  LossTimes %>%
  select(id, LossNumString, LossTime) %>%
  spread(key=LossNumString, value=LossTime)

# Counts of abortions
LossCounts = 
  x %>%
  select(id, starts_with("q67n")) %>%
  gather(key=LossNum, value=LossCount, -id) %>%
  mutate(LossNum = gsub("q67n","",LossNum)) %>%
  mutate(LossNum = gsub("[[:alpha:]]","",LossNum)) %>%
  mutate(LossNum = as.numeric(LossNum)) %>%
  arrange(id, LossNum)

LossCounts = left_join(LossTypes, LossCounts) %>% filter(LossType==3)
# Need to reassign the LossNum to not have gaps
LossCounts = 
  LossCounts %>%
  arrange(id, LossNum) %>%
  group_by(id) %>%
  mutate(LossNum = row_number()) %>%
  mutate(LossNumString = paste("Abortion",NumString[row_number()],"Count",sep=""))

LossCountsWide = 
  LossCounts %>%
  select(id, LossNumString, LossCount) %>%
  spread(key=LossNumString, value=LossCount)

# AbortionPanel = left_join(
#   LossTimes %>% select(-ChildNumString, -LossType), 
#   LossCounts %>% select(-ChildNumString, -LossType))
# ChildBirthsPanel$Date = ymd(with(ChildBirthsPanel, paste(ChildBornYear+1900,ChildBornMonth,"01",sep="-")))
# ChildBirthsPanel$Birth = 1

x = left_join(x, LossTimesWide, by="id")
x = left_join(x, LossCountsWide, by="id")

# IDs for NA in x$Abortion01Time==2: 968 2429

# Problem: not everyone who reported abortion "before first marriage" got married
# Does "before first marriage" mean 
# {Between last birth before marriage and marriage if married ever & After last birth if never married}
# {Between last birth before marriage and marriage if married ever & Any time if never married} (the prejudices of researchers might have produced this kind of classification)
# {Between own puberty and marriage if married ever & After own puberty if never married}

# x  %>%
#   filter(Abortion01Time==98)%>% 
#   select(id, DateFirstMarried, Abortion02Time, Birth01Date, Birth02Date, Birth03Date) %>%
#   print(n = Inf)
# # If never married and Abortion01Time==98, also never had live births
# # Alternative: if never married and also never had live births, Abortion01Time==98
# x  %>%
#   filter(Abortion01Time==98 & is.na(DateFirstMarried))%>% 
#   select(id, Abortion02Time, Birth01Date, Birth02Date, Birth03Date) %>%
#   print(n = Inf)
# # If never married and Abortion01Time!=98, had at least one birth
# # Alternative: if never married and had at least one birth, Abortion01Time!=98
# x  %>%
#   filter(!is.na(Abortion01Time) & Abortion01Time!=98 & is.na(DateFirstMarried))%>% 
#   select(id, Abortion01Time, Abortion02Time, Birth01Date, Birth02Date, Birth03Date) %>%
#   print(n = Inf)

# If married and never had live births, Abortion01Time==98 means before marriage and 1 means after marriage
x  %>%
  filter(!is.na(Abortion01Time) & !is.na(DateFirstMarried) & is.na(Birth01Date))%>% 
  select(id, DateFirstMarried, Abortion01Time) %>%
  print(n = Inf)
# If married and had live births, Abortion01Time coded in relation to births and marriage
x  %>%
  filter(!is.na(Abortion01Time) & !is.na(DateFirstMarried) & !is.na(Birth01Date))%>% 
  select(id, Abortion01Time, DateFirstMarried, Birth01Date, Birth02Date, Birth03Date) %>%
  print(n = Inf)

# If never married and never had live births, Abortion01Time==98
x  %>%
  filter(!is.na(Abortion01Time) & is.na(DateFirstMarried) & is.na(Birth01Date))%>% 
  select(id, Abortion01Time) %>%
  print(n = Inf)
# If never married and had live births, Abortion01Time coded in relation to births
# Presumably, Abortion01Time==98 for women whose abortion occured after all births, but this is not seen in any case
x  %>%
  filter(!is.na(Abortion01Time) & is.na(DateFirstMarried) & !is.na(Birth01Date))%>% 
  select(id, Abortion01Time, Birth01Date, Birth02Date, Birth03Date) %>%
  print(n = Inf)
x  %>%
  filter(!is.na(Abortion02Time))%>% 
  select(id, Abortion01Time, Abortion02Time, Birth01Date, Birth02Date, Birth03Date) %>%
  print(n = Inf)

# # If married and first live birth was before first marriage, 
# # unclear whether Abortion01Time==98 means after previous births (but only one case)
# x  %>%
#   filter(!is.na(Abortion01Time) & !is.na(DateFirstMarried) & !is.na(Birth01Date) & Birth01Date<DateFirstMarried)%>% 
#   select(id, Abortion01Time, DateFirstMarried, Birth01Date, Birth02Date, Birth03Date, Birth04Date, Birth05Date) %>%
#   print(n = Inf)



# # Age at last birth
# hist(
#   with(x,
#        pmax(Birth01Year, Birth02Year, Birth03Year, Birth04Year, Birth05Year, 
#             Birth06Year, Birth07Year, Birth08Year, Birth09Year, Birth10Year, na.rm=T)-
#          BornYear)+1900)


# Under construction

# Need to correct these for time while pregnant
# and for old age
# Fewer than 1% of abortions happen after age 45
# Make age 45 an upper bound
x$Ab1DateMax = as.Date(NA)
# never married and never had live births
x$Ab1DateMax[is.na(x$Birth01Date) & is.na(x$YearFirstMarried) & !is.na(x$Abortion01Time)] = 
  pmin(x$SurveyDate[is.na(x$Birth01Date) & is.na(x$YearFirstMarried) & !is.na(x$Abortion01Time)],
       x$BornDate[is.na(x$Birth01Date) & is.na(x$YearFirstMarried) & !is.na(x$Abortion01Time)]+years(45))
# married and never had live births
x$Ab1DateMax[is.na(x$Birth01Date) & !is.na(x$YearFirstMarried) & !is.na(x$Abortion01Time)] = 
  zoo::as.Date(with(x %>% filter(is.na(Birth01Date) & !is.na(YearFirstMarried) & !is.na(Abortion01Time)),
               ifelse(Abortion01Time==98, DateFirstMarried, SurveyDate)), origin="1970-01-01")
# had live births
# Max is date of subsequent pregnancy minus a median pregancy duration of 280 days (9 months)
# Jukic, A. M., Baird, D. D., Weinberg, C. R., McConnaughey, D. R., & Wilcox, A. J. (2013). 
# Length of human pregnancy and contributors to its natural variation. 
# Human Reproduction (Oxford, England), 28(10), 2848-2855. http://doi.org/10.1093/humrep/det297
# minus one month to allow for post-abortion fertility
# Should instead use months
x$Ab1DateMax[x$Abortion01Time==1 & !is.na(x$Abortion01Time)] = 
  pmin(x$Birth01Date[x$Abortion01Time==1 & !is.na(x$Abortion01Time)]-months(10),
       x$SurveyDate[x$Abortion01Time==1 & !is.na(x$Abortion01Time)],
       x$BornDate[x$Abortion01Time==1 & !is.na(x$Abortion01Time)]+years(45), na.rm=T)
x$Ab1DateMax[x$Abortion01Time==2 & !is.na(x$Abortion01Time)] = 
  pmin(x$Birth02Date[x$Abortion01Time==2 & !is.na(x$Abortion01Time)]-months(10),
       x$SurveyDate[x$Abortion01Time==2 & !is.na(x$Abortion01Time)],
       x$BornDate[x$Abortion01Time==2 & !is.na(x$Abortion01Time)]+years(45), na.rm=T)
x$Ab1DateMax[x$Abortion01Time==3 & !is.na(x$Abortion01Time)] = 
  pmin(x$Birth03Date[x$Abortion01Time==3 & !is.na(x$Abortion01Time)]-months(10),
       x$SurveyDate[x$Abortion01Time==3 & !is.na(x$Abortion01Time)],
       x$BornDate[x$Abortion01Time==3 & !is.na(x$Abortion01Time)]+years(45), na.rm=T)
x$Ab1DateMax[x$Abortion01Time==4 & !is.na(x$Abortion01Time)] = 
  pmin(x$Birth04Date[x$Abortion01Time==4 & !is.na(x$Abortion01Time)]-months(10),
       x$SurveyDate[x$Abortion01Time==4 & !is.na(x$Abortion01Time)],
       x$BornDate[x$Abortion01Time==4 & !is.na(x$Abortion01Time)]+years(45), na.rm=T)
x$Ab1DateMax[x$Abortion01Time==5 & !is.na(x$Abortion01Time)] = 
  pmin(x$Birth05Date[x$Abortion01Time==5 & !is.na(x$Abortion01Time)]-months(10),
       x$SurveyDate[x$Abortion01Time==5 & !is.na(x$Abortion01Time)],
       x$BornDate[x$Abortion01Time==5 & !is.na(x$Abortion01Time)]+years(45), na.rm=T)
x$Ab1DateMax[x$Abortion01Time==6 & !is.na(x$Abortion01Time)] = 
  pmin(x$Birth06Date[x$Abortion01Time==6 & !is.na(x$Abortion01Time)]-months(10),
       x$SurveyDate[x$Abortion01Time==6 & !is.na(x$Abortion01Time)],
       x$BornDate[x$Abortion01Time==6 & !is.na(x$Abortion01Time)]+years(45), na.rm=T)
x$Ab1DateMax[x$Abortion01Time==7 & !is.na(x$Abortion01Time)] = 
  pmin(x$Birth07Date[x$Abortion01Time==7 & !is.na(x$Abortion01Time)]-months(10),
       x$SurveyDate[x$Abortion01Time==7 & !is.na(x$Abortion01Time)],
       x$BornDate[x$Abortion01Time==7 & !is.na(x$Abortion01Time)]+years(45), na.rm=T)
x$Ab1DateMax[x$Abortion01Time==8 & !is.na(x$Abortion01Time)] = 
  pmin(x$Birth08Date[x$Abortion01Time==8 & !is.na(x$Abortion01Time)]-months(10),
       x$SurveyDate[x$Abortion01Time==8 & !is.na(x$Abortion01Time)],
       x$BornDate[x$Abortion01Time==8 & !is.na(x$Abortion01Time)]+years(45), na.rm=T)
x$Ab1DateMax[x$Abortion01Time==9 & !is.na(x$Abortion01Time)] = 
  pmin(x$Birth09Date[x$Abortion01Time==9 & !is.na(x$Abortion01Time)]-months(10),
       x$SurveyDate[x$Abortion01Time==9 & !is.na(x$Abortion01Time)],
       x$BornDate[x$Abortion01Time==9 & !is.na(x$Abortion01Time)]+years(45), na.rm=T)
x$Ab1DateMax[x$Abortion01Time==10 & !is.na(x$Abortion01Time)] = 
  pmin(x$Birth10Date[x$Abortion01Time==10 & !is.na(x$Abortion01Time)]-months(10),
       x$SurveyDate[x$Abortion01Time==10 & !is.na(x$Abortion01Time)],
       x$BornDate[x$Abortion01Time==10 & !is.na(x$Abortion01Time)]+years(45), na.rm=T)
x$Ab1DateMax[x$Abortion01Time==98 & !is.na(x$Abortion01Time)] = 
  pmin(x$DateFirstMarried[x$Abortion01Time==98 & !is.na(x$Abortion01Time)],
       x$SurveyDate[x$Abortion01Time==98 & !is.na(x$Abortion01Time)],
       x$BornDate[x$Abortion01Time==98 & !is.na(x$Abortion01Time)]+years(45), na.rm=T)

hist(as.numeric(x$Ab1DateMax-x$BornDate)/365,breaks=17:46)


# Minimum abortion timing
# Need to account for breastfeeding?

# Minimum date for abortion (13 in this case)
x$MinPregnancyDate = ymd(paste(x$BornYear+13, x$BornMonth, 01, sep="-"))
x$Ab1DateMin = as.Date(NA)
# never married and never had live births
x$Ab1DateMin[is.na(x$Birth01Date) & is.na(x$DateFirstMarried) & !is.na(x$Abortion01Time)] = 
  x$MinPregnancyDate[is.na(x$Birth01Date) & is.na(x$DateFirstMarried) & !is.na(x$Abortion01Time)]
# married and never had live births
x$Ab1DateMin[is.na(x$Birth01Date) & !is.na(x$DateFirstMarried) & !is.na(x$Abortion01Time)] = 
  zoo::as.Date(with(x %>% filter(is.na(Birth01Date) & !is.na(DateFirstMarried) & !is.na(Abortion01Time)),
               ifelse(Abortion01Time==98, MinPregnancyDate, DateFirstMarried)), origin="1970-01-01")
# never married and had live births
# Allow two months after most recent birth for recognition of new pregnancy
x$Ab1DateMin[x$Abortion01Time==1 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)] = 
  x$MinPregnancyDate[x$Abortion01Time==1 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)]
x$Ab1DateMin[x$Abortion01Time==2 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)] = 
  # pmax(
  #   x$Birth01Date[x$Abortion01Time==2 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)]+months(2)
  #   x$Birth01Date[x$Abortion01Time==2 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)]+months(2)
  # )
  x$Birth01Date[x$Abortion01Time==2 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)]+months(2)
x$Ab1DateMin[x$Abortion01Time==3 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)] = 
  x$Birth02Date[x$Abortion01Time==3 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)]+months(2)
x$Ab1DateMin[x$Abortion01Time==4 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)] = 
  x$Birth03Date[x$Abortion01Time==4 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)]+months(2)
x$Ab1DateMin[x$Abortion01Time==5 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)] = 
  x$Birth04Date[x$Abortion01Time==5 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)]+months(2)
x$Ab1DateMin[x$Abortion01Time==6 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)] = 
  x$Birth05Date[x$Abortion01Time==6 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)]+months(2)
x$Ab1DateMin[x$Abortion01Time==7 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)] = 
  x$Birth06Date[x$Abortion01Time==7 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)]+months(2)
x$Ab1DateMin[x$Abortion01Time==8 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)] = 
  x$Birth07Date[x$Abortion01Time==8 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)]+months(2)
x$Ab1DateMin[x$Abortion01Time==9 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)] = 
  x$Birth08Date[x$Abortion01Time==9 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)]+months(2)
x$Ab1DateMin[x$Abortion01Time==10 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)] = 
  x$Birth09Date[x$Abortion01Time==10 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)]+months(2)
x$Ab1DateMin[x$Abortion01Time==98 & !is.na(x$Abortion01Time) & is.na(x$DateFirstMarried)] = 
  with(x %>% filter(Abortion01Time==98 & !is.na(Abortion01Time) & is.na(DateFirstMarried)),
       pmax(MinPregnancyDate, 
            pmax(Birth01Date, Birth02Date, Birth03Date, Birth04Date, Birth05Date, 
            Birth06Date, Birth07Date, Birth08Date, Birth09Date, Birth10Date, na.rm=T)+months(2), 
            na.rm=T)
  )
# married and had live births
# For Abortion01Time==98, need to select the largest birth date that is smaller than the marriage date
x$Ab1DateMin[x$Abortion01Time==98 & !is.na(x$Abortion01Time) & !is.na(x$DateFirstMarried) & !is.na(x$Birth01Date)] = 
  zoo::as.Date(
    with(x %>% filter(Abortion01Time==98 & !is.na(Abortion01Time) & !is.na(DateFirstMarried) & !is.na(Birth01Date)),
         ifelse(MinPregnancyDate<DateFirstMarried & (DateFirstMarried<Birth01Date | is.na(Birth01Date)), MinPregnancyDate,
                ifelse(Birth01Date<DateFirstMarried & (DateFirstMarried<Birth02Date | is.na(Birth02Date)), Birth01Date+months(2),
                       ifelse(Birth02Date<DateFirstMarried & (DateFirstMarried<Birth03Date | is.na(Birth02Date)), Birth02Date+months(2),
                              ifelse(Birth03Date<DateFirstMarried & (DateFirstMarried<Birth04Date | is.na(Birth02Date)), Birth03Date+months(2),
                                     ifelse(Birth04Date<DateFirstMarried & (DateFirstMarried<Birth05Date | is.na(Birth02Date)), Birth04Date+months(2),
                                            ifelse(Birth05Date<DateFirstMarried & (DateFirstMarried<Birth06Date | is.na(Birth02Date)), Birth05Date+months(2),
                                                   ifelse(Birth06Date<DateFirstMarried & (DateFirstMarried<Birth07Date | is.na(Birth02Date)), Birth06Date+months(2),
                                                          ifelse(Birth07Date<DateFirstMarried & (DateFirstMarried<Birth08Date | is.na(Birth02Date)), Birth07Date+months(2),
                                                                 ifelse(Birth08Date<DateFirstMarried & (DateFirstMarried<Birth09Date | is.na(Birth02Date)), Birth08Date+months(2),
                                                                        ifelse(Birth09Date<DateFirstMarried & (DateFirstMarried<Birth10Date | is.na(Birth02Date)), Birth09Date+months(2), Birth10Date+months(2))
                                                                 )
                                                          )
                                                   )
                                            )
                                     )
                              )
                       )
                )
         )
    )
  )
x$Ab1DateMin[x$Abortion01Time==1 & !is.na(x$Abortion01Time) & !is.na(x$DateFirstMarried) & !is.na(x$Birth01Date)] = 
  zoo::as.Date(
    with(x %>% filter(Abortion01Time==1 & !is.na(Abortion01Time) & !is.na(DateFirstMarried) & !is.na(Birth01Date)),
         ifelse(MinPregnancyDate<DateFirstMarried & (DateFirstMarried<Birth01Date | is.na(Birth01Date)), 
                DateFirstMarried,
                MinPregnancyDate)
         )
  )
# Abortion01Time==2 implies !is.na(x$Birth01Date)
x$Ab1DateMin[x$Abortion01Time==2 & !is.na(x$Abortion01Time) & !is.na(x$DateFirstMarried)] = 
  zoo::as.Date(
    with(x %>% filter(Abortion01Time==2 & !is.na(Abortion01Time) & !is.na(DateFirstMarried)),
         ifelse(Birth01Date<DateFirstMarried & (DateFirstMarried<Birth02Date | is.na(Birth02Date)), 
                DateFirstMarried,
                Birth01Date+months(2))
    )
  )
x$Ab1DateMin[x$Abortion01Time==3 & !is.na(x$Abortion01Time) & !is.na(x$DateFirstMarried)] = 
  zoo::as.Date(
    with(x %>% filter(Abortion01Time==3 & !is.na(Abortion01Time) & !is.na(DateFirstMarried)),
         ifelse(Birth02Date<DateFirstMarried & (DateFirstMarried<Birth03Date | is.na(Birth03Date)), 
                DateFirstMarried,
                Birth02Date+months(2))
    )
  )
x$Ab1DateMin[x$Abortion01Time==4 & !is.na(x$Abortion01Time) & !is.na(x$DateFirstMarried)] = 
  zoo::as.Date(
    with(x %>% filter(Abortion01Time==4 & !is.na(Abortion01Time) & !is.na(DateFirstMarried)),
         ifelse(Birth03Date<DateFirstMarried & (DateFirstMarried<Birth04Date | is.na(Birth04Date)), 
                DateFirstMarried,
                Birth03Date+months(2))
    )
  )
x$Ab1DateMin[x$Abortion01Time==5 & !is.na(x$Abortion01Time) & !is.na(x$DateFirstMarried)] = 
  zoo::as.Date(
    with(x %>% filter(Abortion01Time==5 & !is.na(Abortion01Time) & !is.na(DateFirstMarried)),
         ifelse(Birth04Date<DateFirstMarried & (DateFirstMarried<Birth05Date | is.na(Birth05Date)), 
                DateFirstMarried,
                Birth04Date+months(2))
    )
  )
x$Ab1DateMin[x$Abortion01Time==6 & !is.na(x$Abortion01Time) & !is.na(x$DateFirstMarried)] = 
  zoo::as.Date(
    with(x %>% filter(Abortion01Time==6 & !is.na(Abortion01Time) & !is.na(DateFirstMarried)),
         ifelse(Birth05Date<DateFirstMarried & (DateFirstMarried<Birth06Date | is.na(Birth06Date)), 
                DateFirstMarried,
                Birth05Date+months(2))
    )
  )
x$Ab1DateMin[x$Abortion01Time==7 & !is.na(x$Abortion01Time) & !is.na(x$DateFirstMarried)] = 
  zoo::as.Date(
    with(x %>% filter(Abortion01Time==7 & !is.na(Abortion01Time) & !is.na(DateFirstMarried)),
         ifelse(Birth06Date<DateFirstMarried & (DateFirstMarried<Birth07Date | is.na(Birth07Date)), 
                DateFirstMarried,
                Birth06Date+months(2))
    )
  )
x$Ab1DateMin[x$Abortion01Time==8 & !is.na(x$Abortion01Time) & !is.na(x$DateFirstMarried)] = 
  zoo::as.Date(
    with(x %>% filter(Abortion01Time==8 & !is.na(Abortion01Time) & !is.na(DateFirstMarried)),
         ifelse(Birth07Date<DateFirstMarried & (DateFirstMarried<Birth08Date | is.na(Birth08Date)), 
                DateFirstMarried,
                Birth07Date+months(2))
    )
  )
x$Ab1DateMin[x$Abortion01Time==9 & !is.na(x$Abortion01Time) & !is.na(x$DateFirstMarried)] = 
  zoo::as.Date(
    with(x %>% filter(Abortion01Time==9 & !is.na(Abortion01Time) & !is.na(DateFirstMarried)),
         ifelse(Birth08Date<DateFirstMarried & (DateFirstMarried<Birth09Date | is.na(Birth09Date)), 
                DateFirstMarried,
                Birth08Date+months(2))
    )
  )
x$Ab1DateMin[x$Abortion01Time==10 & !is.na(x$Abortion01Time) & !is.na(x$DateFirstMarried)] = 
  zoo::as.Date(
    with(x %>% filter(Abortion01Time==10 & !is.na(Abortion01Time) & !is.na(DateFirstMarried)),
         ifelse(Birth09Date<DateFirstMarried & (DateFirstMarried<Birth10Date | is.na(Birth10Date)), 
                DateFirstMarried,
                Birth09Date+months(2))
    )
  )

hist(as.numeric(x$Ab1DateMax-x$Ab1DateMin)/365)
hist(as.numeric(x$Ab1DateMin-x$BornDate)/365,breaks=13:45)


# # Some minima are higher than the maxima
# # They seem to have had twins
# # Neither had adoptions or step-children
# print(
# x %>%
#   filter(Ab1DateMin>Ab1DateMax) %>%
#   select(id, 
#          # BornDate, DateFirstMarried,
#          Ab1DateMin, Ab1DateMax, Abortion01Time,  
#          # Birth01Date,
#          Birth02Date, Birth03Date,
#          Birth04Date,
#          # Birth05Date,
#          # q53by1st,q53bm1st,q53bd1st,
#          # q53by2nd,q53bm2nd,q53bd2nd,
#          q53by3rd,q53bm3rd,q53bd3rd,
#          q53by4th,q53bm4th,q53bd4th
#          ),
# n=Inf)
x$Ab1DateMax[x$id==1040] = x$Birth02Date[x$id==1040]-months(10)
x$Ab1DateMin[x$id==1040] = x$Birth01Date[x$id==1040]+months(2)
x$Ab1DateMax[x$id==1974] = x$Birth03Date[x$id==1974]-months(10)
x$Ab1DateMin[x$id==1974] = x$Birth02Date[x$id==1974]+months(2)

x$AbortionBefore21 = ifelse(x$Ab1DateMax-x$BornDate<years(21), 1, 0)

x$AbortionPropBefore21 = 
  pmin(pmax(as.numeric(x$BornDate+years(21)-x$Ab1DateMin)/as.numeric(x$Ab1DateMax-x$Ab1DateMin),0),1)



####### Combine panels

# # First check for repetitions
# fMovesPanel %>%
#   group_by(id, Date) %>%
#   filter(length(id)>1)
# ContraPanel %>%
#   group_by(id, Date) %>%
#   filter(length(id)>1)
# ChildBirthsPanel %>%
#   group_by(id, Date) %>%
#   filter(length(id)>1)
# PregnancyDates %>%
#   group_by(id, Date) %>%
#   filter(length(id)>1)

panel = left_join(fMovesPanel,ContraPanel, by=c("id","Date"))
panel = left_join(panel,ChildBirthsPanel, by=c("id","Date"))
panel = left_join(panel,PregnancyDates, by=c("id","Date"))

# # Check for repetitions again
# panel %>%
#   group_by(id, Date) %>%
#   filter(length(id)>1)

panel = 
  right_join(
    x %>% 
      select(id,BornMonth,BornYear, BornDate,
             AgeAtFirstPill, AgeAtFirstPillCensored, 
             AgeAtFirstCondom, AgeAtFirstCondomCensored, 
             AgeAtFirstIUD, AgeAtFirstIUDCensored, 
             YearFirstMarried,MonthFirstMarried,
             q21a, q22a,q22b, Birth01Date,
             FirstBirthDay, FirstBirthMonth, FirstBirthYear, 
             AgeAtFirstBirthMonths, EverAbortion, scwt,
             PillDateFirst, CondomDateFirst, IUDDateFirst), 
    panel, by="id")
# panel = 
#   right_join(
#     x %>% select(id,BornMonth,BornYear, SurveyMonth, SurveyYear, AgeAtFirstPill, AgeAtFirstPillCensored,YearFirstMarried,MonthFirstMarried,q21a, q22a,q22b), 
#     panel, by="id")
# panel = left_join(panel, xHomes, by=c("id","HomeNum"))
panel$Age = with(panel, zoo::as.yearmon(Date) - zoo::as.yearmon(paste(BornYear,BornMonth,sep="-")))
# panel$Age = zoo::as.yearmon(panel$Date) - zoo::as.yearmon(paste(panel$BornYear,panel$BornMonth,sep="-"))
panel$AgeMonths = as.integer(round(panel$Age*12))
# aom = read_csv("Australia/data/Australia_Laws_WithOverseas.csv")
panel = left_join(panel, aom, by = "StateCode")
panel = arrange(panel, id, Date)

# panel = left_join(panel,PillStart, by=c("id","Date"="ContraStart"))
# panel = left_join(panel,PillFinish, by=c("id","Date"="ContraFinish"))
panel$Pill[is.na(panel$Pill)] = 0
panel$Condom[is.na(panel$Condom)] = 0
panel$IUD[is.na(panel$IUD)] = 0
# panel$PillStart[is.na(panel$PillStart)] = 0
# panel$PillFinish[is.na(panel$PillFinish)] = 0
# panel$start[is.na(panel$start)] = 0
# panel$finish[is.na(panel$finish)] = 0
# panel$start  = ifelse(panel$Date == panel$PillStart & !is.na(panel$PillStart), 1, 0)
# panel$finish = ifelse(panel$Date == panel$PillFinish & !is.na(panel$PillStart), 1, 0)
# panel$start[is.na(panel$start)] = 0
# panel$finish[is.na(panel$finish)] = 0
panel$MarriedBefore = "No"
panel$MarriedBefore[!is.na(panel$YearFirstMarried)] = 
  with(panel[!is.na(panel$YearFirstMarried),], 
       ifelse(zoo::as.Date(paste(YearFirstMarried,MonthFirstMarried,"01",sep="-"))<=Date, "Yes", "No")
  )
# panel$MarriedBefore = factor(panel$MarriedBefore)

panel = filter(panel, AgeMonths>0)
panel$AgeMonthsStart = round(panel$AgeMonths)
panel$AgeMonthsStart = panel$AgeMonths-1
# panel$AgeStart = panel$Age-1/12
panel$year = year(panel$Date)
panel$month = month(panel$Date)
# panel$year = as.numeric(substr(panel$Date,1,4))
# panel$month = as.numeric(substr(panel$Date,6,7))

panel$AoMLawChangeDate = ymd(paste(panel$AoMLawChangeYear,panel$AoMLawChangeMonth,"01",sep="-"))

panel$YC = 
  with(panel,
       ifelse(StateCode=="Overseas",
              0,
              ifelse(
                18<=Age & Age<21 &
                  Date >= AoMLawChangeDate,
                1,0)
       )
  )

panel$YCbroad = panel$YC
# with(panel,
#      ifelse(StateCode=="Overseas",
#             0,
#             ifelse(
#               18<=Age & Age<21 &
#                 Date >= zoo::as.Date(paste(AoMLawChangeYear,MonthNumString[AoMLawChangeMonth],"01",sep="-"),format="%Y-%m-%d"),
#               1,0)
#      )
# )
panel$YCbroad[with(panel, StateCode=="NSW" & 14<=Age & Age<21 & Date >= AoMLawChangeDate)] = 1

# panel = plyr::ddply(panel, ~id, transform, UsedPillLastMonth = lag(Pill))
panel$After = ifelse(panel$Date >= panel$AoMLawChangeDate, 1, 0)
panel$Turned18ThisMonth = ifelse(panel$Age==18, 1, 0)

# Indicator for whether used Pill in the past or this month
# The +(1/100) is an allowance for the fact that Age is a float
panel$UsedPillBeforeOrNow = ifelse(panel$Age+(1/100) >= panel$AgeAtFirstPillCensored, 1, 0)
# Indicator for whether used Pill in the past
# The +(1/100) is an allowance for the fact that age is a float
panel$UsedPillBefore = ifelse(panel$Age > panel$AgeAtFirstPillCensored+(1/100), 1, 0)
panel$Catholic = ifelse(panel$q21a==1, 1, 0)

panel$FirstPill = ifelse(panel$Age==panel$AgeAtFirstPillCensored, 1, 0)


# unique_id <- function(x, ...) {
#   id_set <- x %>% select(...)
#   id_set_dist <- id_set %>% distinct
#   if (nrow(id_set) == nrow(id_set_dist)) {
#     TRUE
#   } else {
#     non_unique_ids <- id_set %>% 
#       filter(id_set %>% duplicated) %>% 
#       distinct()
#     suppressMessages(
#       inner_join(non_unique_ids, x) %>% arrange(...)
#     )
#   }
# }
# unique_id(panel[,c("id","Date")], id, Date)


# library(xts)
# panelxts = xts(x = panel, order.by = panel$Date)
# 
# pdata.frame(panel, "id", "Date", "panelp")

# # library(plm)
# panelplm = pdata.frame(panel, index = c("id","Date"), drop.index = FALSE, row.names = TRUE, stringsAsFactors = FALSE)
# panelplm$UsedPillLastMonth = plm::lag(panelplm$Pill)
# panelplm$Date= zoo::as.Date(panelplm$Date)
# panelplm$id = as.numeric(levels(panelplm$id))[panelplm$id]
# # panel$id = factor(panel$id)
# panel = left_join(panel, panelplm[,c("id","Date","UsedPillLastMonth")], by=c("id", "Date"))

panel = panel %>%
  group_by(id) %>%
  mutate(UsedPillLastMonth = dplyr::lag(Pill))

panel = panel %>%
  group_by(id) %>%
  mutate(
    YClag1 = dplyr::lag(YCbroad),
    YClag2 = dplyr::lag(YCbroad, 2),
    YClag3 = dplyr::lag(YCbroad, 3),
    YClag4 = dplyr::lag(YCbroad, 4),
    YClag5 = dplyr::lag(YCbroad, 5),
    YClag6 = dplyr::lag(YCbroad, 6),
    YClag7 = dplyr::lag(YCbroad, 7),
    YClag8 = dplyr::lag(YCbroad, 8)
  )


panel$After = ifelse(panel$Date >= panel$AoMLawChangeDate, 1, 0)
# Factors for interaction FEs
panel$StateBorn = paste(panel$StateCode,panel$BornYear, sep="_")
panel$StateYear = paste(panel$StateCode,panel$year, sep="_")
panel$AgeFactor = with(panel, factor(floor(Age), levels=10:50))

panel$Age18 = ifelse(18<=panel$Age & panel$Age<19, 1, 0)
panel$Age19 = ifelse(19<=panel$Age & panel$Age<20, 1, 0)
panel$Age20 = ifelse(20<=panel$Age & panel$Age<21, 1, 0)
panel$Age1013 = ifelse(10<=panel$Age & panel$Age<14, 1, 0)
panel$Age1417 = ifelse(14<=panel$Age & panel$Age<18, 1, 0)
panel$Age1820 = ifelse(18<=panel$Age & panel$Age<21, 1, 0)
panel$Age2130 = ifelse(21<=panel$Age & panel$Age<31, 1, 0)
panel$Age3140 = ifelse(31<=panel$Age & panel$Age<41, 1, 0)
panel$Age4150 = ifelse(41<=panel$Age & panel$Age<51, 1, 0)

panel$Age1015 = ifelse(10<=panel$Age & panel$Age<16, 1, 0)
panel$Age1617 = ifelse(16<=panel$Age & panel$Age<18, 1, 0)

# # Check for replications again
# panel %>%
#   group_by(id, Date) %>%
#   filter(length(id)>1)
# panel %>%
#   group_by(id, AgeMonths) %>%
#   filter(length(id)>1)
# panel %>%
#   group_by(id, Age) %>%
#   filter(length(id)>1)

AgeGroups = 
  panel %>%
  select(id, Date, Age1013, Age1417, Age1820, Age2130, Age3140, Age4150) %>%
  gather(key=AgeGroup, value=AgeGroupValue, Age1013:Age4150) %>%
  filter(AgeGroupValue==1) %>%
  mutate(AgeGroupValue=NULL) %>%
  mutate(AgeGroup = paste(substr(AgeGroup,4,5),substr(AgeGroup,6,7), sep="-"))
# mutate(AgeGroup = gsub("Age","",AgeGroup))

AgeGroups2 = 
  panel %>%
  select(id, Date, Age1015, Age1617, Age1820, Age2130, Age3140, Age4150) %>%
  gather(key=AgeGroup2, value=AgeGroupValue, c(Age1015, Age1617, Age1820, Age2130, Age3140, Age4150)) %>%
  filter(AgeGroupValue==1) %>%
  mutate(AgeGroupValue=NULL) %>%
  mutate(AgeGroup2 = paste(substr(AgeGroup2,4,5),substr(AgeGroup2,6,7), sep="-"))
# mutate(AgeGroup2 = gsub("Age","",AgeGroup2))

panel = left_join(panel, AgeGroups, by=c("id", "Date"))
panel = left_join(panel, AgeGroups2, by=c("id", "Date"))


# ContraType2 = 
#   select(x, id, Contra2_1, Contra2_2, Contra2_3, Contra2_4, Contra2_5, Contra2_6, Contra2_7, Contra2_8, Contra2_9) %>%
#   gather(ContraceptiveNum, ContraType2, Contra2_1:Contra2_9, na.rm=T)
# ContraType2$ContraceptiveNum = as.numeric(gsub("Contra2_","",ContraType2$ContraceptiveNum))

# Use 1961 because that is the earliest year in the hazard models
panel$ACTYear = with(panel, ifelse(StateCode=="ACT", year-1961, 0))
panel$NSWYear = with(panel, ifelse(StateCode=="NSW", year-1961, 0))
panel$NTYear  = with(panel, ifelse(StateCode=="NT",  year-1961, 0))
panel$QldYear = with(panel, ifelse(StateCode=="Qld", year-1961, 0))
panel$SAYear  = with(panel, ifelse(StateCode=="SA",  year-1961, 0))
panel$TasYear = with(panel, ifelse(StateCode=="Tas", year-1961, 0))
panel$VicYear = with(panel, ifelse(StateCode=="Vic", year-1961, 0))
panel$WAYear  = with(panel, ifelse(StateCode=="WA",  year-1961, 0))

# Use 1961 because that is the earliest year in the hazard models
panel$ACTYear2 = panel$ACTYear^2
panel$NSWYear2 = panel$NSWYear^2
panel$NTYear2  = panel$NTYear^2
panel$QldYear2 = panel$QldYear^2
panel$SAYear2  = panel$SAYear^2
panel$TasYear2 = panel$TasYear^2
panel$VicYear2 = panel$VicYear^2
panel$WAYear2  = panel$WAYear^2

panel = panel %>%
  group_by(id) %>%
  mutate(
    MaxAge = max(Age),
    Date18 = Date[Age==18],
    StateCode18 = StateCode[Age==18],
    YC18 = YC[Age==18],
    Pill18 = Pill[Age==18],
    UsedPillMonthBefore18 = UsedPillLastMonth[Age==18], 
    UsedPillBefore18 = UsedPillBefore[Age==18], 
    MarriedBefore18 = MarriedBefore[Age==18],
    Date19 = Date[Age==19],
    StateCode19 = StateCode[Age==19],
    YC19 = YC[Age==19],
    Pill19 = Pill[Age==19],
    UsedPillMonthBefore19 = UsedPillLastMonth[Age==19], 
    UsedPillBefore19 = UsedPillBefore[Age==19], 
    MarriedBefore19 = MarriedBefore[Age==19],
    Date20 = Date[Age==20],
    StateCode20 = StateCode[Age==20],
    YC20 = YC[Age==20],
    Pill20 = Pill[Age==20],
    UsedPillMonthBefore20 = UsedPillLastMonth[Age==20], 
    UsedPillBefore20 = UsedPillBefore[Age==20], 
    MarriedBefore20 = MarriedBefore[Age==20],
    Date21 = Date[Age==21],
    StateCode21 = StateCode[Age==21],
    YC21 = YC[Age==21],
    Pill21 = Pill[Age==21],
    UsedPillMonthBefore21 = UsedPillLastMonth[Age==21], 
    UsedPillBefore21 = UsedPillBefore[Age==21], 
    MarriedBefore21 = MarriedBefore[Age==21],
    Date22 = ifelse(MaxAge>=22, Date[Age==22], NA),
    StateCode22 = ifelse(MaxAge>=22, StateCode[Age==22], NA),
    YC22 = ifelse(MaxAge>=22, YC[Age==22], NA),
    Pill22 = ifelse(MaxAge>=22, Pill[Age==22], NA),
    UsedPillMonthBefore22 = ifelse(MaxAge>=22, UsedPillLastMonth[Age==22], NA),
    UsedPillBefore22 = ifelse(MaxAge>=22, UsedPillBefore[Age==22], NA),
    MarriedBefore22 = ifelse(MaxAge>=22, MarriedBefore[Age==22], NA),
    YCprop1820 = mean(YC[18<=Age & Age<21]),
    Pill1820Ever = max(Pill[18<=Age & Age<21], na.rm=T),
    StateMode1820 = Mode(StateCode[18<=Age & Age<21])) %>%
  ungroup %>%
  mutate(MonthsSinceAoM = interval(AoMLawChangeDate, Date)%/%months(1))


# Often need information about conditions at or between ages 18 and 21
# I left this in for legacy code, but it is a silly way to do this
Age18 = 
  panel %>%
  filter(18==Age) %>%
  select(id, Date, StateCode, YC, Pill, UsedPillBefore, UsedPillLastMonth, MarriedBefore) %>%
  rename(Date18=Date, StateCode18=StateCode, YC18=YC, Pill18=Pill, 
         UsedPillMonthBefore18=UsedPillLastMonth, UsedPillBefore18=UsedPillBefore, 
         MarriedBefore18=MarriedBefore)
Age19 = 
  panel %>%
  filter(19==Age) %>%
  select(id, Date, StateCode, YC, Pill, UsedPillBefore, UsedPillLastMonth, MarriedBefore) %>%
  rename(Date19=Date, StateCode19=StateCode, YC19=YC, Pill19=Pill, 
         UsedPillMonthBefore19=UsedPillLastMonth, UsedPillBefore19=UsedPillBefore, 
         MarriedBefore19=MarriedBefore)
Age20 = 
  panel %>%
  filter(20==Age) %>%
  select(id, Date, StateCode, YC, Pill, UsedPillBefore, UsedPillLastMonth, MarriedBefore) %>%
  rename(Date20=Date, StateCode20=StateCode, YC20=YC, Pill20=Pill, 
         UsedPillMonthBefore20=UsedPillLastMonth, UsedPillBefore20=UsedPillBefore, 
         MarriedBefore20=MarriedBefore)
Age21 = 
  panel %>%
  filter(21==Age) %>%
  select(id, Date, StateCode, YC, Pill, UsedPillBefore, UsedPillLastMonth, MarriedBefore) %>%
  rename(Date21=Date, StateCode21=StateCode, YC21=YC, Pill21=Pill, 
         UsedPillMonthBefore21=UsedPillLastMonth, UsedPillBefore21=UsedPillBefore, 
         MarriedBefore21=MarriedBefore)
Age22 = 
  panel %>%
  filter(22==Age) %>%
  select(id, Date, StateCode, YC, Pill, UsedPillBefore, UsedPillLastMonth, MarriedBefore) %>%
  rename(Date22=Date, StateCode22=StateCode, YC22=YC, Pill22=Pill, 
         UsedPillMonthBefore22=UsedPillLastMonth, UsedPillBefore22=UsedPillBefore, 
         MarriedBefore22=MarriedBefore)

data1820 = 
  panel %>%
  filter(18<=Age & Age<21) %>%
  group_by(id) %>%
  summarise(YCprop = mean(YC),
            Pill1820Ever = max(Pill, na.rm=T),
            StateMode1820 = Mode(StateCode))

# Clean up
RemoveList = c("Over21", "homepanel", "panelplm", "xHomes", "xState",
               "xMoveMonth", "xMoveMonthFinish", "xMoveYear", "xMoveYearFinish",
               "Contra", "ContraTemp", "PillPanel", "PillPanelTemp",
               "ContraFinish", "ContraFinishMonth", "ContraFinishYear",
               "ContraStart", "ContraStartMonth", "ContraStartYear",
               "ContraType1", "ContraType2", "AgeGroups", "AgeGroups2",
               "CondomPanel", "CondomPanelTemp", "IUDPanel", "IUDPanelTemp",
               "br2", "br","br2Births","br2Pop","br2Rate", "brus",
               "c1986", "c1991", "c1996","c2001","c2006",
               "ChildBirthDatesWide", "ChildBirthMonthsWide", "ChildBirthYearsWide",
               "ChildBirthMonths","ChildBirthYears","ChildBirthsPanel", "ChildTypes", 
               "LossTypes", "LossTimes", "LossCounts", "LossCountsWide", "LossTimesWide",
               "HSF", "HSF_temp", "HSF1", 
               "hazard_Born", "OISample", "counts",
               "PregnancyDates", "BirthDates",
               # "x",
               "reglmAgeAfterMonths_born", "reglmAgeAfterMonths_year", 
               "reglmAgeAfterYears_born", "reglmAgeAfterYears_year",
               # "Age18", "Age21", "data1820",
               "Pill1820", "PillProp", "Preg", "YC", "AgeBirths", "AgeBirths1", "AgePop",
               "mfx", "mfx_Married", "mfx_Married_sep", "mfx_sep", "mfx_Unmarried", "mfx_Unmarried_sep",
               "BornCounts", "CountLabels", "CountLabelsNT", "CountLabelsOverseas",
               "idtemp2Moves", "idtemp3Moves", "idtemp4Moves", "idtemp5Moves",
               "OIPillForm", "OIPillForm1", "OIPillForm2", "OIPillForm3", "OIPillForm4", "OIPillForm5",
               "PillFormula", "PillFormula18",
               "PregForm18", "PregForm19", "PregForm20", "PregForm21", "PregForm22", "PregForm23", "PregForm24",
               "ChildBirthDatesWide", "ChildBirthMonths", "ChildBirthMonthsWide", 
               "ChildBirthsPanel", "ChildBirthYears", "ChildBirthYearsWide", "ChildTypes", 
               "LossCounts", "LossCountsWide", "LossTimes", "LossTimesWide", "LossTypes",
               "fMovesFirstTimeThisYear",
               "fMovedIn", "fMovedInCorrected", "fMovedInMonth", "fMovedInYear",
               "fHomes", "fMoveMonth", "fMoveMonthFinish", "fMoveYear", "fMoveYearFinish", "fMoves",
               "fMovesPanel", "fMovesFull", "fMovesSmall", "Probs", 
               "RepeatedHomes", "CombinedMoves", "CombinedMovesMultiHome",
               "fMarriageMonth", "fMarriageYear", "fRelationshipOver", "fRelationshipType", 
               "IUDDateFirst", "PillDateFirst", "CondomDateFirst", 
               "ContraPanel", "ContraUsedAgain", 
               "idRepeats", "FinishAmbiguityID", "PillDrop")
for(rmName in RemoveList){
  try(rm(list=rmName), silent=T)
}
rm(rmName)
rm(RemoveList)

# save.image( file="AFP_full.RData")
save(x,     file="AFP_x.RData")
save(panel, file="AFP_panel.RData")



